123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299 |
- import numpy as np
- from sklearn.utils.linear_assignment_ import linear_assignment
- import copy
- from sklearn.metrics.pairwise import cosine_similarity as cosine
- class Tracker(object):
- def __init__(self, opt):
- self.opt = opt
- self.reset()
- self.nID = 10000
- self.alpha = 0.1
- def init_track(self, results):
- for item in results:
- if item['score'] > self.opt.new_thresh:
- self.id_count += 1
- # active and age are never used in the paper
- item['active'] = 1
- item['age'] = 1
- item['tracking_id'] = self.id_count
- if not ('ct' in item):
- bbox = item['bbox']
- item['ct'] = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
- self.tracks.append(item)
- self.nID = 10000
- self.embedding_bank = np.zeros((self.nID, 128))
- self.cat_bank = np.zeros((self.nID), dtype=np.int)
- def reset(self):
- self.id_count = 0
- self.nID = 10000
- self.tracks = []
- self.embedding_bank = np.zeros((self.nID, 128))
- self.cat_bank = np.zeros((self.nID), dtype=np.int)
- self.tracklet_ages = np.zeros((self.nID), dtype=np.int)
- self.alive = []
- def step(self, results_with_low, public_det=None):
- results = [item for item in results_with_low if item['score'] >= self.opt.track_thresh]
-
- # first association
- N = len(results)
- M = len(self.tracks)
- self.alive = []
- track_boxes = np.array([[track['bbox'][0], track['bbox'][1],
- track['bbox'][2], track['bbox'][3]] for track in self.tracks], np.float32) # M x 4
- det_boxes = np.array([[item['bbox'][0], item['bbox'][1],
- item['bbox'][2], item['bbox'][3]] for item in results], np.float32) # N x 4
- box_ious = self.bbox_overlaps_py(det_boxes, track_boxes)
- dets = np.array(
- [det['ct'] + det['tracking'] for det in results], np.float32) # N x 2
- track_size = np.array([((track['bbox'][2] - track['bbox'][0]) * \
- (track['bbox'][3] - track['bbox'][1])) \
- for track in self.tracks], np.float32) # M
- track_cat = np.array([track['class'] for track in self.tracks], np.int32) # M
- item_size = np.array([((item['bbox'][2] - item['bbox'][0]) * \
- (item['bbox'][3] - item['bbox'][1])) \
- for item in results], np.float32) # N
- item_cat = np.array([item['class'] for item in results], np.int32) # N
- tracks = np.array(
- [pre_det['ct'] for pre_det in self.tracks], np.float32) # M x 2
- dist = (((tracks.reshape(1, -1, 2) - \
- dets.reshape(-1, 1, 2)) ** 2).sum(axis=2)) # N x M
- if self.opt.dataset == 'youtube_vis':
- invalid = ((dist > track_size.reshape(1, M)) + \
- (dist > item_size.reshape(N, 1)) + (box_ious < self.opt.overlap_thresh)) > 0
- else:
- invalid = ((dist > track_size.reshape(1, M)) + \
- (dist > item_size.reshape(N, 1)) + \
- (item_cat.reshape(N, 1) != track_cat.reshape(1, M)) + (box_ious < self.opt.overlap_thresh)) > 0
- dist = dist + invalid * 1e18
- if self.opt.hungarian:
- item_score = np.array([item['score'] for item in results], np.float32) # N
- dist[dist > 1e18] = 1e18
- matched_indices = linear_assignment(dist)
- else:
- matched_indices = greedy_assignment(copy.deepcopy(dist))
- unmatched_dets = [d for d in range(dets.shape[0]) \
- if not (d in matched_indices[:, 0])]
- unmatched_tracks = [d for d in range(tracks.shape[0]) \
- if not (d in matched_indices[:, 1])]
- if self.opt.hungarian:
- matches = []
- for m in matched_indices:
- if dist[m[0], m[1]] > 1e16:
- unmatched_dets.append(m[0])
- unmatched_tracks.append(m[1])
- else:
- matches.append(m)
- matches = np.array(matches).reshape(-1, 2)
- else:
- matches = matched_indices
- ret = []
- for m in matches:
- track = results[m[0]]
- track['tracking_id'] = self.tracks[m[1]]['tracking_id']
- track['age'] = 1
- track['active'] = self.tracks[m[1]]['active'] + 1
- if 'embedding' in track:
- self.alive.append(track['tracking_id'])
- self.embedding_bank[self.tracks[m[1]]['tracking_id'] - 1, :] = self.alpha * track['embedding'] \
- + (1 - self.alpha) * self.embedding_bank[
- self.tracks[m[1]][
- 'tracking_id'] - 1,
- :]
- self.cat_bank[self.tracks[m[1]]['tracking_id'] - 1] = track['class']
- ret.append(track)
- if self.opt.public_det and len(unmatched_dets) > 0:
- # Public detection: only create tracks from provided detections
- pub_dets = np.array([d['ct'] for d in public_det], np.float32)
- dist3 = ((dets.reshape(-1, 1, 2) - pub_dets.reshape(1, -1, 2)) ** 2).sum(
- axis=2)
- matched_dets = [d for d in range(dets.shape[0]) \
- if not (d in unmatched_dets)]
- dist3[matched_dets] = 1e18
- for j in range(len(pub_dets)):
- i = dist3[:, j].argmin()
- if dist3[i, j] < item_size[i]:
- dist3[i, :] = 1e18
- track = results[i]
- if track['score'] > self.opt.new_thresh:
- self.id_count += 1
- track['tracking_id'] = self.id_count
- track['age'] = 1
- track['active'] = 1
- ret.append(track)
- else:
- # Private detection: create tracks for all un-matched detections
- for i in unmatched_dets:
- track = results[i]
- if track['score'] > self.opt.new_thresh:
- if 'embedding' in track:
- max_id, max_cos = self.get_similarity(track['embedding'], False, track['class'])
- if max_cos >= 0.3 and self.tracklet_ages[max_id - 1] < self.opt.window_size:
- track['tracking_id'] = max_id
- track['age'] = 1
- track['active'] = 1
- self.embedding_bank[track['tracking_id'] - 1, :] = self.alpha * track['embedding'] \
- + (1 - self.alpha) * self.embedding_bank[track['tracking_id'] - 1,:]
- else:
- self.id_count += 1
- track['tracking_id'] = self.id_count
- track['age'] = 1
- track['active'] = 1
- self.embedding_bank[self.id_count - 1, :] = track['embedding']
- self.cat_bank[self.id_count - 1] = track['class']
- self.alive.append(track['tracking_id'])
- ret.append(track)
- else:
- self.id_count += 1
- track['tracking_id'] = self.id_count
- track['age'] = 1
- track['active'] = 1
- ret.append(track)
- self.tracklet_ages[:self.id_count] = self.tracklet_ages[:self.id_count] + 1
- for track in ret:
- self.tracklet_ages[track['tracking_id'] - 1] = 1
-
-
- # second association
- results_second = [item for item in results_with_low if item['score'] < self.opt.track_thresh]
- self_tracks_second = [self.tracks[i] for i in unmatched_tracks if self.tracks[i]['active'] > 0]
- second2original = [i for i in unmatched_tracks if self.tracks[i]['active'] > 0]
-
- N = len(results_second)
- M = len(self_tracks_second)
-
- if N > 0 and M > 0:
- track_boxes_second = np.array([[track['bbox'][0], track['bbox'][1],
- track['bbox'][2], track['bbox'][3]] for track in self_tracks_second], np.float32) # M x 4
- det_boxes_second = np.array([[item['bbox'][0], item['bbox'][1],
- item['bbox'][2], item['bbox'][3]] for item in results_second], np.float32) # N x 4
- box_ious_second = self.bbox_overlaps_py(det_boxes_second, track_boxes_second)
- dets = np.array(
- [det['ct'] + det['tracking'] for det in results_second], np.float32) # N x 2
- track_size = np.array([((track['bbox'][2] - track['bbox'][0]) * \
- (track['bbox'][3] - track['bbox'][1])) \
- for track in self_tracks_second], np.float32) # M
- track_cat = np.array([track['class'] for track in self_tracks_second], np.int32) # M
- item_size = np.array([((item['bbox'][2] - item['bbox'][0]) * \
- (item['bbox'][3] - item['bbox'][1])) \
- for item in results_second], np.float32) # N
- item_cat = np.array([item['class'] for item in results_second], np.int32) # N
- tracks_second = np.array(
- [pre_det['ct'] for pre_det in self_tracks_second], np.float32) # M x 2
- dist = (((tracks_second.reshape(1, -1, 2) - \
- dets.reshape(-1, 1, 2)) ** 2).sum(axis=2)) # N x M
- invalid = ((dist > track_size.reshape(1, M)) + \
- (dist > item_size.reshape(N, 1)) + \
- (item_cat.reshape(N, 1) != track_cat.reshape(1, M)) + (box_ious_second < 0.3)) > 0
- dist = dist + invalid * 1e18
-
- matched_indices_second = greedy_assignment(copy.deepcopy(dist), 1e8)
- unmatched_tracks_second = [d for d in range(tracks_second.shape[0]) \
- if not (d in matched_indices_second[:, 1])]
- matches_second = matched_indices_second
-
- for m in matches_second:
- track = results_second[m[0]]
- track['tracking_id'] = self_tracks_second[m[1]]['tracking_id']
- track['age'] = 1
- track['active'] = self_tracks_second[m[1]]['active'] + 1
- if 'embedding' in track:
- self.alive.append(track['tracking_id'])
- self.embedding_bank[self_tracks_second[m[1]]['tracking_id'] - 1, :] = self.alpha * track['embedding'] \
- + (1 - self.alpha) * self.embedding_bank[self_tracks_second[m[1]]['tracking_id'] - 1,:]
- self.cat_bank[self_tracks_second[m[1]]['tracking_id'] - 1] = track['class']
- ret.append(track)
-
- unmatched_tracks = [second2original[i] for i in unmatched_tracks_second] + \
- [i for i in unmatched_tracks if self.tracks[i]['active'] == 0]
-
-
- # Never used
- for i in unmatched_tracks:
- track = self.tracks[i]
- if track['age'] < self.opt.max_age:
- track['age'] += 1
- track['active'] = 1 # 0
- bbox = track['bbox']
- ct = track['ct']
- v = [0, 0]
- track['bbox'] = [
- bbox[0] + v[0], bbox[1] + v[1],
- bbox[2] + v[0], bbox[3] + v[1]]
- track['ct'] = [ct[0] + v[0], ct[1] + v[1]]
- ret.append(track)
- for r_ in ret:
- del r_['embedding']
- self.tracks = ret
- return ret
- def get_similarity(self, feat, stat, cls):
- max_id = -1
- max_cos = -1
- if stat:
- nID = self.id_count
- else:
- nID = self.id_count
- a = feat[None, :]
- b = self.embedding_bank[:nID, :]
- if len(b) > 0:
- alive = np.array(self.alive, dtype=np.int) - 1
- cosim = cosine(a, b)
- cosim = np.reshape(cosim, newshape=(-1))
- cosim[alive] = -2
- cosim[nID - 1] = -2
- cosim[np.where(self.cat_bank[:nID] != cls)[0]] = -2
- max_id = int(np.argmax(cosim) + 1)
- max_cos = np.max(cosim)
- return max_id, max_cos
- def bbox_overlaps_py(self, boxes, query_boxes):
- """
- determine overlaps between boxes and query_boxes
- :param boxes: n * 4 bounding boxes
- :param query_boxes: k * 4 bounding boxes
- :return: overlaps: n * k overlaps
- """
- n_ = boxes.shape[0]
- k_ = query_boxes.shape[0]
- overlaps = np.zeros((n_, k_), dtype=np.float)
- for k in range(k_):
- query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
- for n in range(n_):
- iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1
- if iw > 0:
- ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1
- if ih > 0:
- box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1)
- all_area = float(box_area + query_box_area - iw * ih)
- overlaps[n, k] = iw * ih / all_area
- return overlaps
- def greedy_assignment(dist, thresh=1e16):
- matched_indices = []
- if dist.shape[1] == 0:
- return np.array(matched_indices, np.int32).reshape(-1, 2)
- for i in range(dist.shape[0]):
- j = dist[i].argmin()
- if dist[i][j] < thresh:
- dist[:, j] = 1e18
- matched_indices.append([i, j])
- return np.array(matched_indices, np.int32).reshape(-1, 2)
|