cocoeval.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2. #include "cocoeval.h"
  3. #include <time.h>
  4. #include <algorithm>
  5. #include <cstdint>
  6. #include <numeric>
  7. using namespace pybind11::literals;
  8. namespace COCOeval {
  9. // Sort detections from highest score to lowest, such that
  10. // detection_instances[detection_sorted_indices[t]] >=
  11. // detection_instances[detection_sorted_indices[t+1]]. Use stable_sort to match
  12. // original COCO API
  13. void SortInstancesByDetectionScore(
  14. const std::vector<InstanceAnnotation>& detection_instances,
  15. std::vector<uint64_t>* detection_sorted_indices) {
  16. detection_sorted_indices->resize(detection_instances.size());
  17. std::iota(
  18. detection_sorted_indices->begin(), detection_sorted_indices->end(), 0);
  19. std::stable_sort(
  20. detection_sorted_indices->begin(),
  21. detection_sorted_indices->end(),
  22. [&detection_instances](size_t j1, size_t j2) {
  23. return detection_instances[j1].score > detection_instances[j2].score;
  24. });
  25. }
  26. // Partition the ground truth objects based on whether or not to ignore them
  27. // based on area
  28. void SortInstancesByIgnore(
  29. const std::array<double, 2>& area_range,
  30. const std::vector<InstanceAnnotation>& ground_truth_instances,
  31. std::vector<uint64_t>* ground_truth_sorted_indices,
  32. std::vector<bool>* ignores) {
  33. ignores->clear();
  34. ignores->reserve(ground_truth_instances.size());
  35. for (auto o : ground_truth_instances) {
  36. ignores->push_back(
  37. o.ignore || o.area < area_range[0] || o.area > area_range[1]);
  38. }
  39. ground_truth_sorted_indices->resize(ground_truth_instances.size());
  40. std::iota(
  41. ground_truth_sorted_indices->begin(),
  42. ground_truth_sorted_indices->end(),
  43. 0);
  44. std::stable_sort(
  45. ground_truth_sorted_indices->begin(),
  46. ground_truth_sorted_indices->end(),
  47. [&ignores](size_t j1, size_t j2) {
  48. return (int)(*ignores)[j1] < (int)(*ignores)[j2];
  49. });
  50. }
  51. // For each IOU threshold, greedily match each detected instance to a ground
  52. // truth instance (if possible) and store the results
  53. void MatchDetectionsToGroundTruth(
  54. const std::vector<InstanceAnnotation>& detection_instances,
  55. const std::vector<uint64_t>& detection_sorted_indices,
  56. const std::vector<InstanceAnnotation>& ground_truth_instances,
  57. const std::vector<uint64_t>& ground_truth_sorted_indices,
  58. const std::vector<bool>& ignores,
  59. const std::vector<std::vector<double>>& ious,
  60. const std::vector<double>& iou_thresholds,
  61. const std::array<double, 2>& area_range,
  62. ImageEvaluation* results) {
  63. // Initialize memory to store return data matches and ignore
  64. const int num_iou_thresholds = iou_thresholds.size();
  65. const int num_ground_truth = ground_truth_sorted_indices.size();
  66. const int num_detections = detection_sorted_indices.size();
  67. std::vector<uint64_t> ground_truth_matches(
  68. num_iou_thresholds * num_ground_truth, 0);
  69. std::vector<uint64_t>& detection_matches = results->detection_matches;
  70. std::vector<bool>& detection_ignores = results->detection_ignores;
  71. std::vector<bool>& ground_truth_ignores = results->ground_truth_ignores;
  72. detection_matches.resize(num_iou_thresholds * num_detections, 0);
  73. detection_ignores.resize(num_iou_thresholds * num_detections, false);
  74. ground_truth_ignores.resize(num_ground_truth);
  75. for (auto g = 0; g < num_ground_truth; ++g) {
  76. ground_truth_ignores[g] = ignores[ground_truth_sorted_indices[g]];
  77. }
  78. for (auto t = 0; t < num_iou_thresholds; ++t) {
  79. for (auto d = 0; d < num_detections; ++d) {
  80. // information about best match so far (match=-1 -> unmatched)
  81. double best_iou = std::min(iou_thresholds[t], 1 - 1e-10);
  82. int match = -1;
  83. for (auto g = 0; g < num_ground_truth; ++g) {
  84. // if this ground truth instance is already matched and not a
  85. // crowd, it cannot be matched to another detection
  86. if (ground_truth_matches[t * num_ground_truth + g] > 0 &&
  87. !ground_truth_instances[ground_truth_sorted_indices[g]].is_crowd) {
  88. continue;
  89. }
  90. // if detected instance matched to a regular ground truth
  91. // instance, we can break on the first ground truth instance
  92. // tagged as ignore (because they are sorted by the ignore tag)
  93. if (match >= 0 && !ground_truth_ignores[match] &&
  94. ground_truth_ignores[g]) {
  95. break;
  96. }
  97. // if IOU overlap is the best so far, store the match appropriately
  98. if (ious[d][ground_truth_sorted_indices[g]] >= best_iou) {
  99. best_iou = ious[d][ground_truth_sorted_indices[g]];
  100. match = g;
  101. }
  102. }
  103. // if match was made, store id of match for both detection and
  104. // ground truth
  105. if (match >= 0) {
  106. detection_ignores[t * num_detections + d] = ground_truth_ignores[match];
  107. detection_matches[t * num_detections + d] =
  108. ground_truth_instances[ground_truth_sorted_indices[match]].id;
  109. ground_truth_matches[t * num_ground_truth + match] =
  110. detection_instances[detection_sorted_indices[d]].id;
  111. }
  112. // set unmatched detections outside of area range to ignore
  113. const InstanceAnnotation& detection =
  114. detection_instances[detection_sorted_indices[d]];
  115. detection_ignores[t * num_detections + d] =
  116. detection_ignores[t * num_detections + d] ||
  117. (detection_matches[t * num_detections + d] == 0 &&
  118. (detection.area < area_range[0] || detection.area > area_range[1]));
  119. }
  120. }
  121. // store detection score results
  122. results->detection_scores.resize(detection_sorted_indices.size());
  123. for (size_t d = 0; d < detection_sorted_indices.size(); ++d) {
  124. results->detection_scores[d] =
  125. detection_instances[detection_sorted_indices[d]].score;
  126. }
  127. }
  128. std::vector<ImageEvaluation> EvaluateImages(
  129. const std::vector<std::array<double, 2>>& area_ranges,
  130. int max_detections,
  131. const std::vector<double>& iou_thresholds,
  132. const ImageCategoryInstances<std::vector<double>>& image_category_ious,
  133. const ImageCategoryInstances<InstanceAnnotation>&
  134. image_category_ground_truth_instances,
  135. const ImageCategoryInstances<InstanceAnnotation>&
  136. image_category_detection_instances) {
  137. const int num_area_ranges = area_ranges.size();
  138. const int num_images = image_category_ground_truth_instances.size();
  139. const int num_categories =
  140. image_category_ious.size() > 0 ? image_category_ious[0].size() : 0;
  141. std::vector<uint64_t> detection_sorted_indices;
  142. std::vector<uint64_t> ground_truth_sorted_indices;
  143. std::vector<bool> ignores;
  144. std::vector<ImageEvaluation> results_all(
  145. num_images * num_area_ranges * num_categories);
  146. // Store results for each image, category, and area range combination. Results
  147. // for each IOU threshold are packed into the same ImageEvaluation object
  148. for (auto i = 0; i < num_images; ++i) {
  149. for (auto c = 0; c < num_categories; ++c) {
  150. const std::vector<InstanceAnnotation>& ground_truth_instances =
  151. image_category_ground_truth_instances[i][c];
  152. const std::vector<InstanceAnnotation>& detection_instances =
  153. image_category_detection_instances[i][c];
  154. SortInstancesByDetectionScore(
  155. detection_instances, &detection_sorted_indices);
  156. if ((int)detection_sorted_indices.size() > max_detections) {
  157. detection_sorted_indices.resize(max_detections);
  158. }
  159. for (size_t a = 0; a < area_ranges.size(); ++a) {
  160. SortInstancesByIgnore(
  161. area_ranges[a],
  162. ground_truth_instances,
  163. &ground_truth_sorted_indices,
  164. &ignores);
  165. MatchDetectionsToGroundTruth(
  166. detection_instances,
  167. detection_sorted_indices,
  168. ground_truth_instances,
  169. ground_truth_sorted_indices,
  170. ignores,
  171. image_category_ious[i][c],
  172. iou_thresholds,
  173. area_ranges[a],
  174. &results_all
  175. [c * num_area_ranges * num_images + a * num_images + i]);
  176. }
  177. }
  178. }
  179. return results_all;
  180. }
  181. // Convert a python list to a vector
  182. template <typename T>
  183. std::vector<T> list_to_vec(const py::list& l) {
  184. std::vector<T> v(py::len(l));
  185. for (int i = 0; i < (int)py::len(l); ++i) {
  186. v[i] = l[i].cast<T>();
  187. }
  188. return v;
  189. }
  190. // Helper function to Accumulate()
  191. // Considers the evaluation results applicable to a particular category, area
  192. // range, and max_detections parameter setting, which begin at
  193. // evaluations[evaluation_index]. Extracts a sorted list of length n of all
  194. // applicable detection instances concatenated across all images in the dataset,
  195. // which are represented by the outputs evaluation_indices, detection_scores,
  196. // image_detection_indices, and detection_sorted_indices--all of which are
  197. // length n. evaluation_indices[i] stores the applicable index into
  198. // evaluations[] for instance i, which has detection score detection_score[i],
  199. // and is the image_detection_indices[i]'th of the list of detections
  200. // for the image containing i. detection_sorted_indices[] defines a sorted
  201. // permutation of the 3 other outputs
  202. int BuildSortedDetectionList(
  203. const std::vector<ImageEvaluation>& evaluations,
  204. const int64_t evaluation_index,
  205. const int64_t num_images,
  206. const int max_detections,
  207. std::vector<uint64_t>* evaluation_indices,
  208. std::vector<double>* detection_scores,
  209. std::vector<uint64_t>* detection_sorted_indices,
  210. std::vector<uint64_t>* image_detection_indices) {
  211. assert(evaluations.size() >= evaluation_index + num_images);
  212. // Extract a list of object instances of the applicable category, area
  213. // range, and max detections requirements such that they can be sorted
  214. image_detection_indices->clear();
  215. evaluation_indices->clear();
  216. detection_scores->clear();
  217. image_detection_indices->reserve(num_images * max_detections);
  218. evaluation_indices->reserve(num_images * max_detections);
  219. detection_scores->reserve(num_images * max_detections);
  220. int num_valid_ground_truth = 0;
  221. for (auto i = 0; i < num_images; ++i) {
  222. const ImageEvaluation& evaluation = evaluations[evaluation_index + i];
  223. for (int d = 0;
  224. d < (int)evaluation.detection_scores.size() && d < max_detections;
  225. ++d) { // detected instances
  226. evaluation_indices->push_back(evaluation_index + i);
  227. image_detection_indices->push_back(d);
  228. detection_scores->push_back(evaluation.detection_scores[d]);
  229. }
  230. for (auto ground_truth_ignore : evaluation.ground_truth_ignores) {
  231. if (!ground_truth_ignore) {
  232. ++num_valid_ground_truth;
  233. }
  234. }
  235. }
  236. // Sort detections by decreasing score, using stable sort to match
  237. // python implementation
  238. detection_sorted_indices->resize(detection_scores->size());
  239. std::iota(
  240. detection_sorted_indices->begin(), detection_sorted_indices->end(), 0);
  241. std::stable_sort(
  242. detection_sorted_indices->begin(),
  243. detection_sorted_indices->end(),
  244. [&detection_scores](size_t j1, size_t j2) {
  245. return (*detection_scores)[j1] > (*detection_scores)[j2];
  246. });
  247. return num_valid_ground_truth;
  248. }
  249. // Helper function to Accumulate()
  250. // Compute a precision recall curve given a sorted list of detected instances
  251. // encoded in evaluations, evaluation_indices, detection_scores,
  252. // detection_sorted_indices, image_detection_indices (see
  253. // BuildSortedDetectionList()). Using vectors precisions and recalls
  254. // and temporary storage, output the results into precisions_out, recalls_out,
  255. // and scores_out, which are large buffers containing many precion/recall curves
  256. // for all possible parameter settings, with precisions_out_index and
  257. // recalls_out_index defining the applicable indices to store results.
  258. void ComputePrecisionRecallCurve(
  259. const int64_t precisions_out_index,
  260. const int64_t precisions_out_stride,
  261. const int64_t recalls_out_index,
  262. const std::vector<double>& recall_thresholds,
  263. const int iou_threshold_index,
  264. const int num_iou_thresholds,
  265. const int num_valid_ground_truth,
  266. const std::vector<ImageEvaluation>& evaluations,
  267. const std::vector<uint64_t>& evaluation_indices,
  268. const std::vector<double>& detection_scores,
  269. const std::vector<uint64_t>& detection_sorted_indices,
  270. const std::vector<uint64_t>& image_detection_indices,
  271. std::vector<double>* precisions,
  272. std::vector<double>* recalls,
  273. std::vector<double>* precisions_out,
  274. std::vector<double>* scores_out,
  275. std::vector<double>* recalls_out) {
  276. assert(recalls_out->size() > recalls_out_index);
  277. // Compute precision/recall for each instance in the sorted list of detections
  278. int64_t true_positives_sum = 0, false_positives_sum = 0;
  279. precisions->clear();
  280. recalls->clear();
  281. precisions->reserve(detection_sorted_indices.size());
  282. recalls->reserve(detection_sorted_indices.size());
  283. assert(!evaluations.empty() || detection_sorted_indices.empty());
  284. for (auto detection_sorted_index : detection_sorted_indices) {
  285. const ImageEvaluation& evaluation =
  286. evaluations[evaluation_indices[detection_sorted_index]];
  287. const auto num_detections =
  288. evaluation.detection_matches.size() / num_iou_thresholds;
  289. const auto detection_index = iou_threshold_index * num_detections +
  290. image_detection_indices[detection_sorted_index];
  291. assert(evaluation.detection_matches.size() > detection_index);
  292. assert(evaluation.detection_ignores.size() > detection_index);
  293. const int64_t detection_match =
  294. evaluation.detection_matches[detection_index];
  295. const bool detection_ignores =
  296. evaluation.detection_ignores[detection_index];
  297. const auto true_positive = detection_match > 0 && !detection_ignores;
  298. const auto false_positive = detection_match == 0 && !detection_ignores;
  299. if (true_positive) {
  300. ++true_positives_sum;
  301. }
  302. if (false_positive) {
  303. ++false_positives_sum;
  304. }
  305. const double recall =
  306. static_cast<double>(true_positives_sum) / num_valid_ground_truth;
  307. recalls->push_back(recall);
  308. const int64_t num_valid_detections =
  309. true_positives_sum + false_positives_sum;
  310. const double precision = num_valid_detections > 0
  311. ? static_cast<double>(true_positives_sum) / num_valid_detections
  312. : 0.0;
  313. precisions->push_back(precision);
  314. }
  315. (*recalls_out)[recalls_out_index] = !recalls->empty() ? recalls->back() : 0;
  316. for (int64_t i = static_cast<int64_t>(precisions->size()) - 1; i > 0; --i) {
  317. if ((*precisions)[i] > (*precisions)[i - 1]) {
  318. (*precisions)[i - 1] = (*precisions)[i];
  319. }
  320. }
  321. // Sample the per instance precision/recall list at each recall threshold
  322. for (size_t r = 0; r < recall_thresholds.size(); ++r) {
  323. // first index in recalls >= recall_thresholds[r]
  324. std::vector<double>::iterator low = std::lower_bound(
  325. recalls->begin(), recalls->end(), recall_thresholds[r]);
  326. size_t precisions_index = low - recalls->begin();
  327. const auto results_ind = precisions_out_index + r * precisions_out_stride;
  328. assert(results_ind < precisions_out->size());
  329. assert(results_ind < scores_out->size());
  330. if (precisions_index < precisions->size()) {
  331. (*precisions_out)[results_ind] = (*precisions)[precisions_index];
  332. (*scores_out)[results_ind] =
  333. detection_scores[detection_sorted_indices[precisions_index]];
  334. } else {
  335. (*precisions_out)[results_ind] = 0;
  336. (*scores_out)[results_ind] = 0;
  337. }
  338. }
  339. }
  340. py::dict Accumulate(
  341. const py::object& params,
  342. const std::vector<ImageEvaluation>& evaluations) {
  343. const std::vector<double> recall_thresholds =
  344. list_to_vec<double>(params.attr("recThrs"));
  345. const std::vector<int> max_detections =
  346. list_to_vec<int>(params.attr("maxDets"));
  347. const int num_iou_thresholds = py::len(params.attr("iouThrs"));
  348. const int num_recall_thresholds = py::len(params.attr("recThrs"));
  349. const int num_categories = params.attr("useCats").cast<int>() == 1
  350. ? py::len(params.attr("catIds"))
  351. : 1;
  352. const int num_area_ranges = py::len(params.attr("areaRng"));
  353. const int num_max_detections = py::len(params.attr("maxDets"));
  354. const int num_images = py::len(params.attr("imgIds"));
  355. std::vector<double> precisions_out(
  356. num_iou_thresholds * num_recall_thresholds * num_categories *
  357. num_area_ranges * num_max_detections,
  358. -1);
  359. std::vector<double> recalls_out(
  360. num_iou_thresholds * num_categories * num_area_ranges *
  361. num_max_detections,
  362. -1);
  363. std::vector<double> scores_out(
  364. num_iou_thresholds * num_recall_thresholds * num_categories *
  365. num_area_ranges * num_max_detections,
  366. -1);
  367. // Consider the list of all detected instances in the entire dataset in one
  368. // large list. evaluation_indices, detection_scores,
  369. // image_detection_indices, and detection_sorted_indices all have the same
  370. // length as this list, such that each entry corresponds to one detected
  371. // instance
  372. std::vector<uint64_t> evaluation_indices; // indices into evaluations[]
  373. std::vector<double> detection_scores; // detection scores of each instance
  374. std::vector<uint64_t> detection_sorted_indices; // sorted indices of all
  375. // instances in the dataset
  376. std::vector<uint64_t>
  377. image_detection_indices; // indices into the list of detected instances in
  378. // the same image as each instance
  379. std::vector<double> precisions, recalls;
  380. for (auto c = 0; c < num_categories; ++c) {
  381. for (auto a = 0; a < num_area_ranges; ++a) {
  382. for (auto m = 0; m < num_max_detections; ++m) {
  383. // The COCO PythonAPI assumes evaluations[] (the return value of
  384. // COCOeval::EvaluateImages() is one long list storing results for each
  385. // combination of category, area range, and image id, with categories in
  386. // the outermost loop and images in the innermost loop.
  387. const int64_t evaluations_index =
  388. c * num_area_ranges * num_images + a * num_images;
  389. int num_valid_ground_truth = BuildSortedDetectionList(
  390. evaluations,
  391. evaluations_index,
  392. num_images,
  393. max_detections[m],
  394. &evaluation_indices,
  395. &detection_scores,
  396. &detection_sorted_indices,
  397. &image_detection_indices);
  398. if (num_valid_ground_truth == 0) {
  399. continue;
  400. }
  401. for (auto t = 0; t < num_iou_thresholds; ++t) {
  402. // recalls_out is a flattened vectors representing a
  403. // num_iou_thresholds X num_categories X num_area_ranges X
  404. // num_max_detections matrix
  405. const int64_t recalls_out_index =
  406. t * num_categories * num_area_ranges * num_max_detections +
  407. c * num_area_ranges * num_max_detections +
  408. a * num_max_detections + m;
  409. // precisions_out and scores_out are flattened vectors
  410. // representing a num_iou_thresholds X num_recall_thresholds X
  411. // num_categories X num_area_ranges X num_max_detections matrix
  412. const int64_t precisions_out_stride =
  413. num_categories * num_area_ranges * num_max_detections;
  414. const int64_t precisions_out_index = t * num_recall_thresholds *
  415. num_categories * num_area_ranges * num_max_detections +
  416. c * num_area_ranges * num_max_detections +
  417. a * num_max_detections + m;
  418. ComputePrecisionRecallCurve(
  419. precisions_out_index,
  420. precisions_out_stride,
  421. recalls_out_index,
  422. recall_thresholds,
  423. t,
  424. num_iou_thresholds,
  425. num_valid_ground_truth,
  426. evaluations,
  427. evaluation_indices,
  428. detection_scores,
  429. detection_sorted_indices,
  430. image_detection_indices,
  431. &precisions,
  432. &recalls,
  433. &precisions_out,
  434. &scores_out,
  435. &recalls_out);
  436. }
  437. }
  438. }
  439. }
  440. time_t rawtime;
  441. struct tm local_time;
  442. std::array<char, 200> buffer;
  443. time(&rawtime);
  444. #ifdef _WIN32
  445. localtime_s(&local_time, &rawtime);
  446. #else
  447. localtime_r(&rawtime, &local_time);
  448. #endif
  449. strftime(
  450. buffer.data(), 200, "%Y-%m-%d %H:%num_max_detections:%S", &local_time);
  451. return py::dict(
  452. "params"_a = params,
  453. "counts"_a = std::vector<int64_t>({num_iou_thresholds,
  454. num_recall_thresholds,
  455. num_categories,
  456. num_area_ranges,
  457. num_max_detections}),
  458. "date"_a = buffer,
  459. "precision"_a = precisions_out,
  460. "recall"_a = recalls_out,
  461. "scores"_a = scores_out);
  462. }
  463. } // namespace COCOeval