processor.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from PIL import Image
  15. import cv2
  16. import numpy as np
  17. def decode_image(im_file, im_info):
  18. """read rgb image
  19. Args:
  20. im_file (str/np.ndarray): path of image/ np.ndarray read by cv2
  21. im_info (dict): info of image
  22. Returns:
  23. im (np.ndarray): processed image (np.ndarray)
  24. im_info (dict): info of processed image
  25. """
  26. if isinstance(im_file, str):
  27. with open(im_file, 'rb') as f:
  28. im_read = f.read()
  29. data = np.frombuffer(im_read, dtype='uint8')
  30. im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
  31. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  32. im_info['origin_shape'] = im.shape[:2]
  33. im_info['resize_shape'] = im.shape[:2]
  34. else:
  35. im = im_file
  36. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  37. im_info['origin_shape'] = im.shape[:2]
  38. im_info['resize_shape'] = im.shape[:2]
  39. return im, im_info
  40. class Resize(object):
  41. """resize image by target_size and max_size
  42. Args:
  43. arch (str): model type
  44. target_size (int): the target size of image
  45. max_size (int): the max size of image
  46. use_cv2 (bool): whether us cv2
  47. image_shape (list): input shape of model
  48. interp (int): method of resize
  49. """
  50. def __init__(self,
  51. target_size=800,
  52. max_size=1333,
  53. use_cv2=True,
  54. image_shape=None,
  55. interp=cv2.INTER_LINEAR,
  56. resize_box=False):
  57. self.target_size = target_size
  58. self.max_size = max_size
  59. self.image_shape = image_shape
  60. self.use_cv2 = use_cv2
  61. self.interp = interp
  62. def __call__(self, im, im_info):
  63. """
  64. Args:
  65. im (np.ndarray): image (np.ndarray)
  66. im_info (dict): info of image
  67. Returns:
  68. im (np.ndarray): processed image (np.ndarray)
  69. im_info (dict): info of processed image
  70. """
  71. im_channel = im.shape[2]
  72. im_scale_x, im_scale_y = self.generate_scale(im)
  73. im_info['resize_shape'] = [
  74. im_scale_x * float(im.shape[0]), im_scale_y * float(im.shape[1])
  75. ]
  76. if self.use_cv2:
  77. im = cv2.resize(
  78. im,
  79. None,
  80. None,
  81. fx=im_scale_x,
  82. fy=im_scale_y,
  83. interpolation=self.interp)
  84. else:
  85. resize_w = int(im_scale_x * float(im.shape[1]))
  86. resize_h = int(im_scale_y * float(im.shape[0]))
  87. if self.max_size != 0:
  88. raise TypeError(
  89. 'If you set max_size to cap the maximum size of image,'
  90. 'please set use_cv2 to True to resize the image.')
  91. im = im.astype('uint8')
  92. im = Image.fromarray(im)
  93. im = im.resize((int(resize_w), int(resize_h)), self.interp)
  94. im = np.array(im)
  95. # padding im when image_shape fixed by infer_cfg.yml
  96. if self.max_size != 0 and self.image_shape is not None:
  97. padding_im = np.zeros(
  98. (self.max_size, self.max_size, im_channel), dtype=np.float32)
  99. im_h, im_w = im.shape[:2]
  100. padding_im[:im_h, :im_w, :] = im
  101. im = padding_im
  102. im_info['scale'] = [im_scale_x, im_scale_y]
  103. return im, im_info
  104. def generate_scale(self, im):
  105. """
  106. Args:
  107. im (np.ndarray): image (np.ndarray)
  108. Returns:
  109. im_scale_x: the resize ratio of X
  110. im_scale_y: the resize ratio of Y
  111. """
  112. origin_shape = im.shape[:2]
  113. im_c = im.shape[2]
  114. if self.max_size != 0:
  115. im_size_min = np.min(origin_shape[0:2])
  116. im_size_max = np.max(origin_shape[0:2])
  117. im_scale = float(self.target_size) / float(im_size_min)
  118. if np.round(im_scale * im_size_max) > self.max_size:
  119. im_scale = float(self.max_size) / float(im_size_max)
  120. im_scale_x = im_scale
  121. im_scale_y = im_scale
  122. else:
  123. im_scale_x = float(self.target_size) / float(origin_shape[1])
  124. im_scale_y = float(self.target_size) / float(origin_shape[0])
  125. return im_scale_x, im_scale_y
  126. class Normalize(object):
  127. """normalize image
  128. Args:
  129. mean (list): im - mean
  130. std (list): im / std
  131. is_scale (bool): whether need im / 255
  132. is_channel_first (bool): if True: image shape is CHW, else: HWC
  133. """
  134. def __init__(self, mean, std, is_scale=True, is_channel_first=False):
  135. self.mean = mean
  136. self.std = std
  137. self.is_scale = is_scale
  138. self.is_channel_first = is_channel_first
  139. def __call__(self, im, im_info):
  140. """
  141. Args:
  142. im (np.ndarray): image (np.ndarray)
  143. im_info (dict): info of image
  144. Returns:
  145. im (np.ndarray): processed image (np.ndarray)
  146. im_info (dict): info of processed image
  147. """
  148. im = im.astype(np.float32, copy=False)
  149. if self.is_channel_first:
  150. mean = np.array(self.mean)[:, np.newaxis, np.newaxis]
  151. std = np.array(self.std)[:, np.newaxis, np.newaxis]
  152. else:
  153. mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
  154. std = np.array(self.std)[np.newaxis, np.newaxis, :]
  155. if self.is_scale:
  156. im = im / 255.0
  157. im -= mean
  158. im /= std
  159. return im, im_info
  160. class Permute(object):
  161. """permute image
  162. Args:
  163. to_bgr (bool): whether convert RGB to BGR
  164. channel_first (bool): whether convert HWC to CHW
  165. """
  166. def __init__(self, to_bgr=False, channel_first=True):
  167. self.to_bgr = to_bgr
  168. self.channel_first = channel_first
  169. def __call__(self, im, im_info):
  170. """
  171. Args:
  172. im (np.ndarray): image (np.ndarray)
  173. im_info (dict): info of image
  174. Returns:
  175. im (np.ndarray): processed image (np.ndarray)
  176. im_info (dict): info of processed image
  177. """
  178. if self.channel_first:
  179. im = im.transpose((2, 0, 1)).copy()
  180. if self.to_bgr:
  181. im = im[[2, 1, 0], :, :]
  182. return im, im_info
  183. class PadStride(object):
  184. """ padding image for model with FPN
  185. Args:
  186. stride (bool): model with FPN need image shape % stride == 0
  187. """
  188. def __init__(self, stride=0):
  189. self.coarsest_stride = stride
  190. def __call__(self, im, im_info):
  191. """
  192. Args:
  193. im (np.ndarray): image (np.ndarray)
  194. im_info (dict): info of image
  195. Returns:
  196. im (np.ndarray): processed image (np.ndarray)
  197. im_info (dict): info of processed image
  198. """
  199. coarsest_stride = self.coarsest_stride
  200. if coarsest_stride == 0:
  201. return im
  202. im_c, im_h, im_w = im.shape
  203. pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
  204. pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
  205. padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
  206. padding_im[:, :im_h, :im_w] = im
  207. im_info['pad_shape'] = padding_im.shape[1:]
  208. return padding_im, im_info
  209. def preprocess(im, preprocess_ops):
  210. # process image by preprocess_ops
  211. im_info = {
  212. 'scale': [1., 1.],
  213. 'origin_shape': None,
  214. 'resize_shape': None,
  215. 'pad_shape': None,
  216. }
  217. im, im_info = decode_image(im, im_info)
  218. count = 0
  219. for operator in preprocess_ops:
  220. count += 1
  221. im, im_info = operator(im, im_info)
  222. im = np.array((im, )).astype('float32')
  223. return im, im_info