123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140 |
- /*************************************************************************
- * Copyright (C) [2019] by Cambricon, Inc. All rights reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *************************************************************************/
- #include <gflags/gflags.h>
- #include <chrono>
- #include <future>
- #include <iostream>
- #include <memory>
- #include <string>
- #include <utility>
- #include <vector>
- #include "device/mlu_context.h"
- #include "easyinfer/easy_infer.h"
- #include "easyinfer/mlu_memory_op.h"
- #include "easyinfer/model_loader.h"
- DEFINE_string(offline_model, "", "path of offline-model");
- DEFINE_string(function_name, "subnet0", "model defined function name");
- DEFINE_int32(th_num, 1, "thread number");
- DEFINE_int32(iterations, 1, "invoke time per thread");
- DEFINE_int32(dev_id, 0, "device id");
- static volatile bool g_run = false;
- std::pair<double, double> ThreadFunc() {
- edk::MluContext ctx;
- ctx.SetDeviceId(FLAGS_dev_id);
- ctx.BindDevice();
- auto msptr = std::make_shared<edk::ModelLoader>(FLAGS_offline_model, FLAGS_function_name);
- edk::EasyInfer infer;
- infer.Init(msptr, FLAGS_dev_id);
- edk::MluMemoryOp mem_op;
- mem_op.SetModel(msptr);
- void** input = mem_op.AllocMluInput();
- void** output = mem_op.AllocMluOutput();
- while (!g_run) {
- std::this_thread::sleep_for(std::chrono::milliseconds(1));
- }
- double sw_total_time = 0, hw_total_time = 0;
- int run_num = FLAGS_iterations;
- while (run_num--) {
- float hw_time = 0;
- auto stime = std::chrono::steady_clock::now();
- infer.Run(input, output, &hw_time);
- auto etime = std::chrono::steady_clock::now();
- std::chrono::duration<double, std::milli> diff = etime - stime;
- sw_total_time += diff.count();
- hw_total_time += hw_time;
- }
- mem_op.FreeMluInput(input);
- mem_op.FreeMluOutput(output);
- if (FLAGS_iterations) {
- return std::make_pair(sw_total_time / FLAGS_iterations, hw_total_time / FLAGS_iterations);
- }
- return std::make_pair(0.0, 0.0);
- }
- int main(int argc, char *argv[]) {
- ::gflags::ParseCommandLineFlags(&argc, &argv, true);
- if (FLAGS_offline_model.size() == 0) {
- std::cout << "offline model size is 0\n";
- return 0;
- }
- if (FLAGS_function_name.size() == 0) {
- std::cout << "function name size is 0\n";
- return 0;
- }
- if (FLAGS_th_num <= 0) {
- std::cout << "thread number <= 0\n";
- return 0;
- }
- if (FLAGS_iterations <= 0) {
- std::cout << "invoke time per thread <= 0\n";
- return 0;
- }
- int batchsize = 0;
- {
- edk::ModelLoader model(FLAGS_offline_model, FLAGS_function_name);
- std::cout << "----------------------input num: " << model.InputNum() << '\n';
- for (uint32_t i = 0; i < model.InputNum(); ++i) {
- std::cout << "model input shape " << i << ": " << model.InputShape(i) << std::endl;
- }
- std::cout << "---------------------output num: " << model.OutputNum() << '\n';
- for (uint32_t i = 0; i < model.OutputNum(); ++i) {
- std::cout << "model output shape " << i << ": " << model.OutputShape(i) << std::endl;
- }
- batchsize = model.InputShape(0).N();
- }
- std::vector<std::future<std::pair<double, double>>> th_perf_infos;
- for (int thi = 0; thi < FLAGS_th_num; ++thi) {
- th_perf_infos.push_back(std::async(std::launch::async, &::ThreadFunc));
- }
- std::this_thread::sleep_for(std::chrono::seconds(2));
- g_run = true;
- double total_sw_time = 0, total_hw_time = 0;
- for (auto& it : th_perf_infos) {
- auto time_pair = it.get();
- total_sw_time += time_pair.first;
- total_hw_time += time_pair.second;
- }
- std::cout << "Avg hardware time: " << total_hw_time / FLAGS_th_num << std::endl;
- std::cout << "Avg software time: " << total_sw_time / FLAGS_th_num << std::endl;
- std::cout << "Fps: " << 1000.0 * FLAGS_th_num * batchsize * FLAGS_th_num / total_sw_time << std::endl;
- return 0;
- }
|