get_model_perfinfo.cpp 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. /*************************************************************************
  2. * Copyright (C) [2019] by Cambricon, Inc. All rights reserved
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  13. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  15. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  18. * THE SOFTWARE.
  19. *************************************************************************/
  20. #include <gflags/gflags.h>
  21. #include <chrono>
  22. #include <future>
  23. #include <iostream>
  24. #include <memory>
  25. #include <string>
  26. #include <utility>
  27. #include <vector>
  28. #include "device/mlu_context.h"
  29. #include "easyinfer/easy_infer.h"
  30. #include "easyinfer/mlu_memory_op.h"
  31. #include "easyinfer/model_loader.h"
  32. DEFINE_string(offline_model, "", "path of offline-model");
  33. DEFINE_string(function_name, "subnet0", "model defined function name");
  34. DEFINE_int32(th_num, 1, "thread number");
  35. DEFINE_int32(iterations, 1, "invoke time per thread");
  36. DEFINE_int32(dev_id, 0, "device id");
  37. static volatile bool g_run = false;
  38. std::pair<double, double> ThreadFunc() {
  39. edk::MluContext ctx;
  40. ctx.SetDeviceId(FLAGS_dev_id);
  41. ctx.BindDevice();
  42. auto msptr = std::make_shared<edk::ModelLoader>(FLAGS_offline_model, FLAGS_function_name);
  43. edk::EasyInfer infer;
  44. infer.Init(msptr, FLAGS_dev_id);
  45. edk::MluMemoryOp mem_op;
  46. mem_op.SetModel(msptr);
  47. void** input = mem_op.AllocMluInput();
  48. void** output = mem_op.AllocMluOutput();
  49. while (!g_run) {
  50. std::this_thread::sleep_for(std::chrono::milliseconds(1));
  51. }
  52. double sw_total_time = 0, hw_total_time = 0;
  53. int run_num = FLAGS_iterations;
  54. while (run_num--) {
  55. float hw_time = 0;
  56. auto stime = std::chrono::steady_clock::now();
  57. infer.Run(input, output, &hw_time);
  58. auto etime = std::chrono::steady_clock::now();
  59. std::chrono::duration<double, std::milli> diff = etime - stime;
  60. sw_total_time += diff.count();
  61. hw_total_time += hw_time;
  62. }
  63. mem_op.FreeMluInput(input);
  64. mem_op.FreeMluOutput(output);
  65. if (FLAGS_iterations) {
  66. return std::make_pair(sw_total_time / FLAGS_iterations, hw_total_time / FLAGS_iterations);
  67. }
  68. return std::make_pair(0.0, 0.0);
  69. }
  70. int main(int argc, char *argv[]) {
  71. ::gflags::ParseCommandLineFlags(&argc, &argv, true);
  72. if (FLAGS_offline_model.size() == 0) {
  73. std::cout << "offline model size is 0\n";
  74. return 0;
  75. }
  76. if (FLAGS_function_name.size() == 0) {
  77. std::cout << "function name size is 0\n";
  78. return 0;
  79. }
  80. if (FLAGS_th_num <= 0) {
  81. std::cout << "thread number <= 0\n";
  82. return 0;
  83. }
  84. if (FLAGS_iterations <= 0) {
  85. std::cout << "invoke time per thread <= 0\n";
  86. return 0;
  87. }
  88. int batchsize = 0;
  89. {
  90. edk::ModelLoader model(FLAGS_offline_model, FLAGS_function_name);
  91. std::cout << "----------------------input num: " << model.InputNum() << '\n';
  92. for (uint32_t i = 0; i < model.InputNum(); ++i) {
  93. std::cout << "model input shape " << i << ": " << model.InputShape(i) << std::endl;
  94. }
  95. std::cout << "---------------------output num: " << model.OutputNum() << '\n';
  96. for (uint32_t i = 0; i < model.OutputNum(); ++i) {
  97. std::cout << "model output shape " << i << ": " << model.OutputShape(i) << std::endl;
  98. }
  99. batchsize = model.InputShape(0).N();
  100. }
  101. std::vector<std::future<std::pair<double, double>>> th_perf_infos;
  102. for (int thi = 0; thi < FLAGS_th_num; ++thi) {
  103. th_perf_infos.push_back(std::async(std::launch::async, &::ThreadFunc));
  104. }
  105. std::this_thread::sleep_for(std::chrono::seconds(2));
  106. g_run = true;
  107. double total_sw_time = 0, total_hw_time = 0;
  108. for (auto& it : th_perf_infos) {
  109. auto time_pair = it.get();
  110. total_sw_time += time_pair.first;
  111. total_hw_time += time_pair.second;
  112. }
  113. std::cout << "Avg hardware time: " << total_hw_time / FLAGS_th_num << std::endl;
  114. std::cout << "Avg software time: " << total_sw_time / FLAGS_th_num << std::endl;
  115. std::cout << "Fps: " << 1000.0 * FLAGS_th_num * batchsize * FLAGS_th_num / total_sw_time << std::endl;
  116. return 0;
  117. }