123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248 |
- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from PIL import Image
- import cv2
- import numpy as np
- def decode_image(im_file, im_info):
- """read rgb image
- Args:
- im_file (str/np.ndarray): path of image/ np.ndarray read by cv2
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- if isinstance(im_file, str):
- with open(im_file, 'rb') as f:
- im_read = f.read()
- data = np.frombuffer(im_read, dtype='uint8')
- im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
- im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
- im_info['origin_shape'] = im.shape[:2]
- im_info['resize_shape'] = im.shape[:2]
- else:
- im = im_file
- im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
- im_info['origin_shape'] = im.shape[:2]
- im_info['resize_shape'] = im.shape[:2]
- return im, im_info
- class Resize(object):
- """resize image by target_size and max_size
- Args:
- arch (str): model type
- target_size (int): the target size of image
- max_size (int): the max size of image
- use_cv2 (bool): whether us cv2
- image_shape (list): input shape of model
- interp (int): method of resize
- """
- def __init__(self,
- target_size=800,
- max_size=1333,
- use_cv2=True,
- image_shape=None,
- interp=cv2.INTER_LINEAR,
- resize_box=False):
- self.target_size = target_size
- self.max_size = max_size
- self.image_shape = image_shape
- self.use_cv2 = use_cv2
- self.interp = interp
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- im_channel = im.shape[2]
- im_scale_x, im_scale_y = self.generate_scale(im)
- im_info['resize_shape'] = [
- im_scale_x * float(im.shape[0]), im_scale_y * float(im.shape[1])
- ]
- if self.use_cv2:
- im = cv2.resize(
- im,
- None,
- None,
- fx=im_scale_x,
- fy=im_scale_y,
- interpolation=self.interp)
- else:
- resize_w = int(im_scale_x * float(im.shape[1]))
- resize_h = int(im_scale_y * float(im.shape[0]))
- if self.max_size != 0:
- raise TypeError(
- 'If you set max_size to cap the maximum size of image,'
- 'please set use_cv2 to True to resize the image.')
- im = im.astype('uint8')
- im = Image.fromarray(im)
- im = im.resize((int(resize_w), int(resize_h)), self.interp)
- im = np.array(im)
- # padding im when image_shape fixed by infer_cfg.yml
- if self.max_size != 0 and self.image_shape is not None:
- padding_im = np.zeros(
- (self.max_size, self.max_size, im_channel), dtype=np.float32)
- im_h, im_w = im.shape[:2]
- padding_im[:im_h, :im_w, :] = im
- im = padding_im
- im_info['scale'] = [im_scale_x, im_scale_y]
- return im, im_info
- def generate_scale(self, im):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- Returns:
- im_scale_x: the resize ratio of X
- im_scale_y: the resize ratio of Y
- """
- origin_shape = im.shape[:2]
- im_c = im.shape[2]
- if self.max_size != 0:
- im_size_min = np.min(origin_shape[0:2])
- im_size_max = np.max(origin_shape[0:2])
- im_scale = float(self.target_size) / float(im_size_min)
- if np.round(im_scale * im_size_max) > self.max_size:
- im_scale = float(self.max_size) / float(im_size_max)
- im_scale_x = im_scale
- im_scale_y = im_scale
- else:
- im_scale_x = float(self.target_size) / float(origin_shape[1])
- im_scale_y = float(self.target_size) / float(origin_shape[0])
- return im_scale_x, im_scale_y
- class Normalize(object):
- """normalize image
- Args:
- mean (list): im - mean
- std (list): im / std
- is_scale (bool): whether need im / 255
- is_channel_first (bool): if True: image shape is CHW, else: HWC
- """
- def __init__(self, mean, std, is_scale=True, is_channel_first=False):
- self.mean = mean
- self.std = std
- self.is_scale = is_scale
- self.is_channel_first = is_channel_first
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- im = im.astype(np.float32, copy=False)
- if self.is_channel_first:
- mean = np.array(self.mean)[:, np.newaxis, np.newaxis]
- std = np.array(self.std)[:, np.newaxis, np.newaxis]
- else:
- mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
- std = np.array(self.std)[np.newaxis, np.newaxis, :]
- if self.is_scale:
- im = im / 255.0
- im -= mean
- im /= std
- return im, im_info
- class Permute(object):
- """permute image
- Args:
- to_bgr (bool): whether convert RGB to BGR
- channel_first (bool): whether convert HWC to CHW
- """
- def __init__(self, to_bgr=False, channel_first=True):
- self.to_bgr = to_bgr
- self.channel_first = channel_first
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- if self.channel_first:
- im = im.transpose((2, 0, 1)).copy()
- if self.to_bgr:
- im = im[[2, 1, 0], :, :]
- return im, im_info
- class PadStride(object):
- """ padding image for model with FPN
- Args:
- stride (bool): model with FPN need image shape % stride == 0
- """
- def __init__(self, stride=0):
- self.coarsest_stride = stride
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- coarsest_stride = self.coarsest_stride
- if coarsest_stride == 0:
- return im
- im_c, im_h, im_w = im.shape
- pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
- pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
- padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
- padding_im[:, :im_h, :im_w] = im
- im_info['pad_shape'] = padding_im.shape[1:]
- return padding_im, im_info
- def preprocess(im, preprocess_ops):
- # process image by preprocess_ops
- im_info = {
- 'scale': [1., 1.],
- 'origin_shape': None,
- 'resize_shape': None,
- 'pad_shape': None,
- }
- im, im_info = decode_image(im, im_info)
- count = 0
- for operator in preprocess_ops:
- count += 1
- im, im_info = operator(im, im_info)
- im = np.array((im, )).astype('float32')
- return im, im_info
|