123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335 |
- # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- import logging
- import numpy as np
- import cv2
- __all__ = ['nms']
- logger = logging.getLogger(__name__)
- def box_flip(boxes, im_shape):
- im_width = im_shape[0][1]
- flipped_boxes = boxes.copy()
- flipped_boxes[:, 0::4] = im_width - boxes[:, 2::4] - 1
- flipped_boxes[:, 2::4] = im_width - boxes[:, 0::4] - 1
- return flipped_boxes
- def nms(dets, thresh):
- """
- refer to:
- https://github.com/facebookresearch/Detectron/blob/main/detectron/utils/cython_nms.pyx
- Apply classic DPM-style greedy NMS.
- """
- if dets.shape[0] == 0:
- return dets[[], :]
- scores = dets[:, 0]
- x1 = dets[:, 1]
- y1 = dets[:, 2]
- x2 = dets[:, 3]
- y2 = dets[:, 4]
- areas = (x2 - x1 + 1) * (y2 - y1 + 1)
- order = scores.argsort()[::-1]
- ndets = dets.shape[0]
- suppressed = np.zeros((ndets), dtype=np.int)
- # nominal indices
- # _i, _j
- # sorted indices
- # i, j
- # temp variables for box i's (the box currently under consideration)
- # ix1, iy1, ix2, iy2, iarea
- # variables for computing overlap with box j (lower scoring box)
- # xx1, yy1, xx2, yy2
- # w, h
- # inter, ovr
- for _i in range(ndets):
- i = order[_i]
- if suppressed[i] == 1:
- continue
- ix1 = x1[i]
- iy1 = y1[i]
- ix2 = x2[i]
- iy2 = y2[i]
- iarea = areas[i]
- for _j in range(_i + 1, ndets):
- j = order[_j]
- if suppressed[j] == 1:
- continue
- xx1 = max(ix1, x1[j])
- yy1 = max(iy1, y1[j])
- xx2 = min(ix2, x2[j])
- yy2 = min(iy2, y2[j])
- w = max(0.0, xx2 - xx1 + 1)
- h = max(0.0, yy2 - yy1 + 1)
- inter = w * h
- ovr = inter / (iarea + areas[j] - inter)
- if ovr >= thresh:
- suppressed[j] = 1
- keep = np.where(suppressed == 0)[0]
- dets = dets[keep, :]
- return dets
- def soft_nms(dets, sigma, thres):
- """
- refer to:
- https://github.com/facebookresearch/Detectron/blob/main/detectron/utils/cython_nms.pyx
- """
- dets_final = []
- while len(dets) > 0:
- maxpos = np.argmax(dets[:, 0])
- dets_final.append(dets[maxpos].copy())
- ts, tx1, ty1, tx2, ty2 = dets[maxpos]
- scores = dets[:, 0]
- # force remove bbox at maxpos
- scores[maxpos] = -1
- x1 = dets[:, 1]
- y1 = dets[:, 2]
- x2 = dets[:, 3]
- y2 = dets[:, 4]
- areas = (x2 - x1 + 1) * (y2 - y1 + 1)
- xx1 = np.maximum(tx1, x1)
- yy1 = np.maximum(ty1, y1)
- xx2 = np.minimum(tx2, x2)
- yy2 = np.minimum(ty2, y2)
- w = np.maximum(0.0, xx2 - xx1 + 1)
- h = np.maximum(0.0, yy2 - yy1 + 1)
- inter = w * h
- ovr = inter / (areas + areas[maxpos] - inter)
- weight = np.exp(-(ovr * ovr) / sigma)
- scores = scores * weight
- idx_keep = np.where(scores >= thres)
- dets[:, 0] = scores
- dets = dets[idx_keep]
- dets_final = np.array(dets_final).reshape(-1, 5)
- return dets_final
- def bbox_area(box):
- w = box[2] - box[0] + 1
- h = box[3] - box[1] + 1
- return w * h
- def bbox_overlaps(x, y):
- N = x.shape[0]
- K = y.shape[0]
- overlaps = np.zeros((N, K), dtype=np.float32)
- for k in range(K):
- y_area = bbox_area(y[k])
- for n in range(N):
- iw = min(x[n, 2], y[k, 2]) - max(x[n, 0], y[k, 0]) + 1
- if iw > 0:
- ih = min(x[n, 3], y[k, 3]) - max(x[n, 1], y[k, 1]) + 1
- if ih > 0:
- x_area = bbox_area(x[n])
- ua = x_area + y_area - iw * ih
- overlaps[n, k] = iw * ih / ua
- return overlaps
- def box_voting(nms_dets, dets, vote_thresh):
- top_dets = nms_dets.copy()
- top_boxes = nms_dets[:, 1:]
- all_boxes = dets[:, 1:]
- all_scores = dets[:, 0]
- top_to_all_overlaps = bbox_overlaps(top_boxes, all_boxes)
- for k in range(nms_dets.shape[0]):
- inds_to_vote = np.where(top_to_all_overlaps[k] >= vote_thresh)[0]
- boxes_to_vote = all_boxes[inds_to_vote, :]
- ws = all_scores[inds_to_vote]
- top_dets[k, 1:] = np.average(boxes_to_vote, axis=0, weights=ws)
- return top_dets
- def get_nms_result(boxes,
- scores,
- config,
- num_classes,
- background_label=0,
- labels=None):
- has_labels = labels is not None
- cls_boxes = [[] for _ in range(num_classes)]
- start_idx = 1 if background_label == 0 else 0
- for j in range(start_idx, num_classes):
- inds = np.where(labels == j)[0] if has_labels else np.where(
- scores[:, j] > config['score_thresh'])[0]
- scores_j = scores[inds] if has_labels else scores[inds, j]
- boxes_j = boxes[inds, :] if has_labels else boxes[inds, j * 4:(j + 1) *
- 4]
- dets_j = np.hstack((scores_j[:, np.newaxis], boxes_j)).astype(
- np.float32, copy=False)
- if config.get('use_soft_nms', False):
- nms_dets = soft_nms(dets_j, config['sigma'], config['nms_thresh'])
- else:
- nms_dets = nms(dets_j, config['nms_thresh'])
- if config.get('enable_voting', False):
- nms_dets = box_voting(nms_dets, dets_j, config['vote_thresh'])
- #add labels
- label = np.array([j for _ in range(len(nms_dets))])
- nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype(
- np.float32, copy=False)
- cls_boxes[j] = nms_dets
- # Limit to max_per_image detections **over all classes**
- image_scores = np.hstack(
- [cls_boxes[j][:, 1] for j in range(start_idx, num_classes)])
- if len(image_scores) > config['detections_per_im']:
- image_thresh = np.sort(image_scores)[-config['detections_per_im']]
- for j in range(start_idx, num_classes):
- keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0]
- cls_boxes[j] = cls_boxes[j][keep, :]
- im_results = np.vstack(
- [cls_boxes[j] for j in range(start_idx, num_classes)])
- return im_results
- def mstest_box_post_process(result, config, num_classes):
- """
- Multi-scale Test
- Only available for batch_size=1 now.
- """
- post_bbox = {}
- use_flip = False
- ms_boxes = []
- ms_scores = []
- im_shape = result['im_shape'][0]
- for k in result.keys():
- if 'bbox' in k:
- boxes = result[k][0]
- boxes = np.reshape(boxes, (-1, 4 * num_classes))
- scores = result['score' + k[4:]][0]
- if 'flip' in k:
- boxes = box_flip(boxes, im_shape)
- use_flip = True
- ms_boxes.append(boxes)
- ms_scores.append(scores)
- ms_boxes = np.concatenate(ms_boxes)
- ms_scores = np.concatenate(ms_scores)
- bbox_pred = get_nms_result(ms_boxes, ms_scores, config, num_classes)
- post_bbox.update({'bbox': (bbox_pred, [[len(bbox_pred)]])})
- if use_flip:
- bbox = bbox_pred[:, 2:]
- bbox_flip = np.append(
- bbox_pred[:, :2], box_flip(bbox, im_shape), axis=1)
- post_bbox.update({'bbox_flip': (bbox_flip, [[len(bbox_flip)]])})
- return post_bbox
- def mstest_mask_post_process(result, cfg):
- mask_list = []
- im_shape = result['im_shape'][0]
- M = cfg.FPNRoIAlign['mask_resolution']
- for k in result.keys():
- if 'mask' in k:
- masks = result[k][0]
- if len(masks.shape) != 4:
- masks = np.zeros((0, M, M))
- mask_list.append(masks)
- continue
- if 'flip' in k:
- masks = masks[:, :, :, ::-1]
- mask_list.append(masks)
- mask_pred = np.mean(mask_list, axis=0)
- return {'mask': (mask_pred, [[len(mask_pred)]])}
- def mask_encode(results, resolution, thresh_binarize=0.5):
- import pycocotools.mask as mask_util
- from ppdet.utils.coco_eval import expand_boxes
- scale = (resolution + 2.0) / resolution
- bboxes = results['bbox'][0]
- masks = results['mask'][0]
- lengths = results['mask'][1][0]
- im_shapes = results['im_shape'][0]
- segms = []
- if bboxes.shape == (1, 1) or bboxes is None:
- return segms
- if len(bboxes.tolist()) == 0:
- return segms
- s = 0
- # for each sample
- for i in range(len(lengths)):
- num = lengths[i]
- im_shape = im_shapes[i]
- bbox = bboxes[s:s + num][:, 2:]
- clsid_scores = bboxes[s:s + num][:, 0:2]
- mask = masks[s:s + num]
- s += num
- im_h = int(im_shape[0])
- im_w = int(im_shape[1])
- expand_bbox = expand_boxes(bbox, scale)
- expand_bbox = expand_bbox.astype(np.int32)
- padded_mask = np.zeros(
- (resolution + 2, resolution + 2), dtype=np.float32)
- for j in range(num):
- xmin, ymin, xmax, ymax = expand_bbox[j].tolist()
- clsid, score = clsid_scores[j].tolist()
- clsid = int(clsid)
- padded_mask[1:-1, 1:-1] = mask[j, clsid, :, :]
- w = xmax - xmin + 1
- h = ymax - ymin + 1
- w = np.maximum(w, 1)
- h = np.maximum(h, 1)
- resized_mask = cv2.resize(padded_mask, (w, h))
- resized_mask = np.array(
- resized_mask > thresh_binarize, dtype=np.uint8)
- im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
- x0 = min(max(xmin, 0), im_w)
- x1 = min(max(xmax + 1, 0), im_w)
- y0 = min(max(ymin, 0), im_h)
- y1 = min(max(ymax + 1, 0), im_h)
- im_mask[y0:y1, x0:x1] = resized_mask[(y0 - ymin):(y1 - ymin), (
- x0 - xmin):(x1 - xmin)]
- segm = mask_util.encode(
- np.array(
- im_mask[:, :, np.newaxis], order='F'))[0]
- segms.append(segm)
- return segms
- def corner_post_process(results, config, num_classes):
- detections = results['bbox'][0]
- keep_inds = (detections[:, 1] > -1)
- detections = detections[keep_inds]
- labels = detections[:, 0]
- scores = detections[:, 1]
- boxes = detections[:, 2:6]
- cls_boxes = get_nms_result(
- boxes, scores, config, num_classes, background_label=-1, labels=labels)
- results.update({'bbox': (cls_boxes, [[len(cls_boxes)]])})
|