123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336 |
- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import cv2
- import numpy as np
- from keypoint_preprocess import get_affine_transform
- def decode_image(im_file, im_info):
- """read rgb image
- Args:
- im_file (str|np.ndarray): input can be image path or np.ndarray
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- if isinstance(im_file, str):
- with open(im_file, 'rb') as f:
- im_read = f.read()
- data = np.frombuffer(im_read, dtype='uint8')
- im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
- im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
- else:
- im = im_file
- im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
- im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
- return im, im_info
- class Resize(object):
- """resize image by target_size and max_size
- Args:
- target_size (int): the target size of image
- keep_ratio (bool): whether keep_ratio or not, default true
- interp (int): method of resize
- """
- def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
- if isinstance(target_size, int):
- target_size = [target_size, target_size]
- self.target_size = target_size
- self.keep_ratio = keep_ratio
- self.interp = interp
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- assert len(self.target_size) == 2
- assert self.target_size[0] > 0 and self.target_size[1] > 0
- im_channel = im.shape[2]
- im_scale_y, im_scale_x = self.generate_scale(im)
- im = cv2.resize(
- im,
- None,
- None,
- fx=im_scale_x,
- fy=im_scale_y,
- interpolation=self.interp)
- im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
- im_info['scale_factor'] = np.array(
- [im_scale_y, im_scale_x]).astype('float32')
- return im, im_info
- def generate_scale(self, im):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- Returns:
- im_scale_x: the resize ratio of X
- im_scale_y: the resize ratio of Y
- """
- origin_shape = im.shape[:2]
- im_c = im.shape[2]
- if self.keep_ratio:
- im_size_min = np.min(origin_shape)
- im_size_max = np.max(origin_shape)
- target_size_min = np.min(self.target_size)
- target_size_max = np.max(self.target_size)
- im_scale = float(target_size_min) / float(im_size_min)
- if np.round(im_scale * im_size_max) > target_size_max:
- im_scale = float(target_size_max) / float(im_size_max)
- im_scale_x = im_scale
- im_scale_y = im_scale
- else:
- resize_h, resize_w = self.target_size
- im_scale_y = resize_h / float(origin_shape[0])
- im_scale_x = resize_w / float(origin_shape[1])
- return im_scale_y, im_scale_x
- class NormalizeImage(object):
- """normalize image
- Args:
- mean (list): im - mean
- std (list): im / std
- is_scale (bool): whether need im / 255
- is_channel_first (bool): if True: image shape is CHW, else: HWC
- """
- def __init__(self, mean, std, is_scale=True):
- self.mean = mean
- self.std = std
- self.is_scale = is_scale
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- im = im.astype(np.float32, copy=False)
- mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
- std = np.array(self.std)[np.newaxis, np.newaxis, :]
- if self.is_scale:
- im = im / 255.0
- im -= mean
- im /= std
- return im, im_info
- class Permute(object):
- """permute image
- Args:
- to_bgr (bool): whether convert RGB to BGR
- channel_first (bool): whether convert HWC to CHW
- """
- def __init__(self, ):
- super(Permute, self).__init__()
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- im = im.transpose((2, 0, 1)).copy()
- return im, im_info
- class PadStride(object):
- """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
- Args:
- stride (bool): model with FPN need image shape % stride == 0
- """
- def __init__(self, stride=0):
- self.coarsest_stride = stride
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- coarsest_stride = self.coarsest_stride
- if coarsest_stride <= 0:
- return im, im_info
- im_c, im_h, im_w = im.shape
- pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
- pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
- padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
- padding_im[:, :im_h, :im_w] = im
- return padding_im, im_info
- class LetterBoxResize(object):
- def __init__(self, target_size):
- """
- Resize image to target size, convert normalized xywh to pixel xyxy
- format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
- Args:
- target_size (int|list): image target size.
- """
- super(LetterBoxResize, self).__init__()
- if isinstance(target_size, int):
- target_size = [target_size, target_size]
- self.target_size = target_size
- def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
- # letterbox: resize a rectangular image to a padded rectangular
- shape = img.shape[:2] # [height, width]
- ratio_h = float(height) / shape[0]
- ratio_w = float(width) / shape[1]
- ratio = min(ratio_h, ratio_w)
- new_shape = (round(shape[1] * ratio),
- round(shape[0] * ratio)) # [width, height]
- padw = (width - new_shape[0]) / 2
- padh = (height - new_shape[1]) / 2
- top, bottom = round(padh - 0.1), round(padh + 0.1)
- left, right = round(padw - 0.1), round(padw + 0.1)
- img = cv2.resize(
- img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
- img = cv2.copyMakeBorder(
- img, top, bottom, left, right, cv2.BORDER_CONSTANT,
- value=color) # padded rectangular
- return img, ratio, padw, padh
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- assert len(self.target_size) == 2
- assert self.target_size[0] > 0 and self.target_size[1] > 0
- height, width = self.target_size
- h, w = im.shape[:2]
- im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
- new_shape = [round(h * ratio), round(w * ratio)]
- im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
- im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
- return im, im_info
- class Pad(object):
- def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
- """
- Pad image to a specified size.
- Args:
- size (list[int]): image target size
- fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
- """
- super(Pad, self).__init__()
- if isinstance(size, int):
- size = [size, size]
- self.size = size
- self.fill_value = fill_value
- def __call__(self, im, im_info):
- im_h, im_w = im.shape[:2]
- h, w = self.size
- if h == im_h and w == im_w:
- im = im.astype(np.float32)
- return im, im_info
- canvas = np.ones((h, w, 3), dtype=np.float32)
- canvas *= np.array(self.fill_value, dtype=np.float32)
- canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
- im = canvas
- return im, im_info
- class WarpAffine(object):
- """Warp affine the image
- """
- def __init__(self,
- keep_res=False,
- pad=31,
- input_h=512,
- input_w=512,
- scale=0.4,
- shift=0.1):
- self.keep_res = keep_res
- self.pad = pad
- self.input_h = input_h
- self.input_w = input_w
- self.scale = scale
- self.shift = shift
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- img = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
- h, w = img.shape[:2]
- if self.keep_res:
- input_h = (h | self.pad) + 1
- input_w = (w | self.pad) + 1
- s = np.array([input_w, input_h], dtype=np.float32)
- c = np.array([w // 2, h // 2], dtype=np.float32)
- else:
- s = max(h, w) * 1.0
- input_h, input_w = self.input_h, self.input_w
- c = np.array([w / 2., h / 2.], dtype=np.float32)
- trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
- img = cv2.resize(img, (w, h))
- inp = cv2.warpAffine(
- img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
- return inp, im_info
- def preprocess(im, preprocess_ops):
- # process image by preprocess_ops
- im_info = {
- 'scale_factor': np.array(
- [1., 1.], dtype=np.float32),
- 'im_shape': None,
- }
- im, im_info = decode_image(im, im_info)
- for operator in preprocess_ops:
- im, im_info = operator(im, im_info)
- return im, im_info
|