123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337 |
- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import os
- import base64
- import cv2
- import numpy as np
- from PIL import Image, ImageDraw
- import paddle.fluid as fluid
- def create_inputs(im, im_info):
- """generate input for different model type
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- inputs (dict): input of model
- """
- inputs = {}
- inputs['image'] = im
- origin_shape = list(im_info['origin_shape'])
- resize_shape = list(im_info['resize_shape'])
- pad_shape = list(im_info['pad_shape']) if im_info[
- 'pad_shape'] is not None else list(im_info['resize_shape'])
- scale_x, scale_y = im_info['scale']
- scale = scale_x
- im_info = np.array([resize_shape + [scale]]).astype('float32')
- inputs['im_info'] = im_info
- return inputs
- def visualize_box_mask(im,
- results,
- labels=None,
- mask_resolution=14,
- threshold=0.5):
- """
- Args:
- im (str/np.ndarray): path of image/np.ndarray read by cv2
- results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
- matix element:[class, score, x_min, y_min, x_max, y_max]
- MaskRCNN's results include 'masks': np.ndarray:
- shape:[N, class_num, mask_resolution, mask_resolution]
- labels (list): labels:['class1', ..., 'classn']
- mask_resolution (int): shape of a mask is:[mask_resolution, mask_resolution]
- threshold (float): Threshold of score.
- Returns:
- im (PIL.Image.Image): visualized image
- """
- if not labels:
- labels = [
- 'background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
- 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire', 'hydrant',
- 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
- 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
- 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
- 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
- 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
- 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
- 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
- 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
- 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
- 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
- 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
- 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
- ]
- if isinstance(im, str):
- im = Image.open(im).convert('RGB')
- else:
- im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
- im = Image.fromarray(im)
- if 'masks' in results and 'boxes' in results:
- im = draw_mask(
- im,
- results['boxes'],
- results['masks'],
- labels,
- resolution=mask_resolution)
- if 'boxes' in results:
- im = draw_box(im, results['boxes'], labels)
- if 'segm' in results:
- im = draw_segm(
- im,
- results['segm'],
- results['label'],
- results['score'],
- labels,
- threshold=threshold)
- return im
- def get_color_map_list(num_classes):
- """
- Args:
- num_classes (int): number of class
- Returns:
- color_map (list): RGB color list
- """
- color_map = num_classes * [0, 0, 0]
- for i in range(0, num_classes):
- j = 0
- lab = i
- while lab:
- color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
- color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
- color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
- j += 1
- lab >>= 3
- color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
- return color_map
- def expand_boxes(boxes, scale=0.0):
- """
- Args:
- boxes (np.ndarray): shape:[N,4], N:number of box,
- matix element:[x_min, y_min, x_max, y_max]
- scale (float): scale of boxes
- Returns:
- boxes_exp (np.ndarray): expanded boxes
- """
- w_half = (boxes[:, 2] - boxes[:, 0]) * .5
- h_half = (boxes[:, 3] - boxes[:, 1]) * .5
- x_c = (boxes[:, 2] + boxes[:, 0]) * .5
- y_c = (boxes[:, 3] + boxes[:, 1]) * .5
- w_half *= scale
- h_half *= scale
- boxes_exp = np.zeros(boxes.shape)
- boxes_exp[:, 0] = x_c - w_half
- boxes_exp[:, 2] = x_c + w_half
- boxes_exp[:, 1] = y_c - h_half
- boxes_exp[:, 3] = y_c + h_half
- return boxes_exp
- def draw_mask(im, np_boxes, np_masks, labels, resolution=14, threshold=0.5):
- """
- Args:
- im (PIL.Image.Image): PIL image
- np_boxes (np.ndarray): shape:[N,6], N: number of box,
- matix element:[class, score, x_min, y_min, x_max, y_max]
- np_masks (np.ndarray): shape:[N, class_num, resolution, resolution]
- labels (list): labels:['class1', ..., 'classn']
- resolution (int): shape of a mask is:[resolution, resolution]
- threshold (float): threshold of mask
- Returns:
- im (PIL.Image.Image): visualized image
- """
- color_list = get_color_map_list(len(labels))
- scale = (resolution + 2.0) / resolution
- im_w, im_h = im.size
- w_ratio = 0.4
- alpha = 0.7
- im = np.array(im).astype('float32')
- rects = np_boxes[:, 2:]
- expand_rects = expand_boxes(rects, scale)
- expand_rects = expand_rects.astype(np.int32)
- clsid_scores = np_boxes[:, 0:2]
- padded_mask = np.zeros((resolution + 2, resolution + 2), dtype=np.float32)
- clsid2color = {}
- for idx in range(len(np_boxes)):
- clsid, score = clsid_scores[idx].tolist()
- clsid = int(clsid)
- xmin, ymin, xmax, ymax = expand_rects[idx].tolist()
- w = xmax - xmin + 1
- h = ymax - ymin + 1
- w = np.maximum(w, 1)
- h = np.maximum(h, 1)
- padded_mask[1:-1, 1:-1] = np_masks[idx, int(clsid), :, :]
- resized_mask = cv2.resize(padded_mask, (w, h))
- resized_mask = np.array(resized_mask > threshold, dtype=np.uint8)
- x0 = min(max(xmin, 0), im_w)
- x1 = min(max(xmax + 1, 0), im_w)
- y0 = min(max(ymin, 0), im_h)
- y1 = min(max(ymax + 1, 0), im_h)
- im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
- im_mask[y0:y1, x0:x1] = resized_mask[(y0 - ymin):(y1 - ymin), (
- x0 - xmin):(x1 - xmin)]
- if clsid not in clsid2color:
- clsid2color[clsid] = color_list[clsid]
- color_mask = clsid2color[clsid]
- for c in range(3):
- color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio * 255
- idx = np.nonzero(im_mask)
- color_mask = np.array(color_mask)
- im[idx[0], idx[1], :] *= 1.0 - alpha
- im[idx[0], idx[1], :] += alpha * color_mask
- return Image.fromarray(im.astype('uint8'))
- def draw_box(im, np_boxes, labels):
- """
- Args:
- im (PIL.Image.Image): PIL image
- np_boxes (np.ndarray): shape:[N,6], N: number of box,
- matix element:[class, score, x_min, y_min, x_max, y_max]
- labels (list): labels:['class1', ..., 'classn']
- Returns:
- im (PIL.Image.Image): visualized image
- """
- draw_thickness = min(im.size) // 320
- draw = ImageDraw.Draw(im)
- clsid2color = {}
- color_list = get_color_map_list(len(labels))
- for dt in np_boxes:
- clsid, bbox, score = int(dt[0]), dt[2:], dt[1]
- xmin, ymin, xmax, ymax = bbox
- w = xmax - xmin
- h = ymax - ymin
- if clsid not in clsid2color:
- clsid2color[clsid] = color_list[clsid]
- color = tuple(clsid2color[clsid])
- # draw bbox
- draw.line(
- [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
- (xmin, ymin)],
- width=draw_thickness,
- fill=color)
- # draw label
- text = "{} {:.4f}".format(labels[clsid], score)
- tw, th = draw.textsize(text)
- draw.rectangle(
- [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color)
- draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
- return im
- def draw_segm(im,
- np_segms,
- np_label,
- np_score,
- labels,
- threshold=0.5,
- alpha=0.7):
- """
- Draw segmentation on image
- """
- mask_color_id = 0
- w_ratio = .4
- color_list = get_color_map_list(len(labels))
- im = np.array(im).astype('float32')
- clsid2color = {}
- np_segms = np_segms.astype(np.uint8)
- index = np.where(np_label == 0)[0]
- index = np.where(np_score[index] > threshold)[0]
- person_segms = np_segms[index]
- person_mask = np.sum(person_segms, axis=0)
- person_mask[person_mask > 1] = 1
- person_mask = np.expand_dims(person_mask, axis=2)
- person_mask = np.repeat(person_mask, 3, axis=2)
- im = im * person_mask
- return Image.fromarray(im.astype('uint8'))
- def load_predictor(model_dir,
- run_mode='fluid',
- batch_size=1,
- use_gpu=False,
- min_subgraph_size=3):
- """set AnalysisConfig, generate AnalysisPredictor
- Args:
- model_dir (str): root path of __model__ and __params__
- use_gpu (bool): whether use gpu
- Returns:
- predictor (PaddlePredictor): AnalysisPredictor
- Raises:
- ValueError: predict by TensorRT need use_gpu == True.
- """
- if not use_gpu and not run_mode == 'fluid':
- raise ValueError(
- "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
- .format(run_mode, use_gpu))
- if run_mode == 'trt_int8':
- raise ValueError("TensorRT int8 mode is not supported now, "
- "please use trt_fp32 or trt_fp16 instead.")
- precision_map = {
- 'trt_int8': fluid.core.AnalysisConfig.Precision.Int8,
- 'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
- 'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
- }
- config = fluid.core.AnalysisConfig(
- os.path.join(model_dir, '__model__'),
- os.path.join(model_dir, '__params__'))
- if use_gpu:
- # initial GPU memory(M), device ID
- config.enable_use_gpu(100, 0)
- # optimize graph and fuse op
- config.switch_ir_optim(True)
- else:
- config.disable_gpu()
- if run_mode in precision_map.keys():
- config.enable_tensorrt_engine(
- workspace_size=1 << 10,
- max_batch_size=batch_size,
- min_subgraph_size=min_subgraph_size,
- precision_mode=precision_map[run_mode],
- use_static=False,
- use_calib_mode=False)
- # disable print log when predict
- config.disable_glog_info()
- # enable shared memory
- config.enable_memory_optim()
- # disable feed, fetch OP, needed by zero_copy_run
- config.switch_use_feed_fetch_ops(False)
- predictor = fluid.core.create_paddle_predictor(config)
- return predictor
- def cv2_to_base64(image):
- data = cv2.imencode('.jpg', image)[1]
- return base64.b64encode(data.tostring()).decode('utf8')
- def base64_to_cv2(b64str):
- data = base64.b64decode(b64str.encode('utf8'))
- data = np.fromstring(data, np.uint8)
- data = cv2.imdecode(data, cv2.IMREAD_COLOR)
- return data
|