cnstream_frame_va.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. /*************************************************************************
  2. * Copyright (C) [2019] by Cambricon, Inc. All rights reserved
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  13. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  15. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  18. * THE SOFTWARE.
  19. *************************************************************************/
  20. #include "cnstream_frame_va.hpp"
  21. #include <cnrt.h>
  22. #include <libyuv.h>
  23. #include <fcntl.h>
  24. #include <stdlib.h>
  25. #include <sys/mman.h>
  26. #include <sys/stat.h>
  27. #include <sys/types.h>
  28. #include <unistd.h>
  29. #include <cmath>
  30. #include <cstring>
  31. #include <map>
  32. #include <memory>
  33. #include <mutex>
  34. #include <string>
  35. #include <utility>
  36. #include <vector>
  37. #include "cnstream_logging.hpp"
  38. #include "cnstream_module.hpp"
  39. namespace cnstream {
  40. namespace color_cvt {
  41. static
  42. cv::Mat BGRToBGR(const CNDataFrame& frame) {
  43. const cv::Mat bgr(frame.height, frame.stride[0], CV_8UC3, const_cast<void*>(frame.data[0]->GetCpuData()));
  44. return bgr(cv::Rect(0, 0, frame.width, frame.height)).clone();
  45. }
  46. static
  47. cv::Mat RGBToBGR(const CNDataFrame& frame) {
  48. const cv::Mat rgb(frame.height, frame.stride[0], CV_8UC3, const_cast<void*>(frame.data[0]->GetCpuData()));
  49. cv::Mat bgr;
  50. cv::cvtColor(rgb, bgr, cv::COLOR_RGB2BGR);
  51. return bgr(cv::Rect(0, 0, frame.width, frame.height)).clone();
  52. }
  53. static
  54. cv::Mat YUV420SPToBGR(const CNDataFrame& frame, bool nv21) {
  55. const uint8_t* y_plane = reinterpret_cast<const uint8_t*>(frame.data[0]->GetCpuData());
  56. const uint8_t* uv_plane = reinterpret_cast<const uint8_t*>(frame.data[1]->GetCpuData());
  57. int width = frame.width;
  58. int height = frame.height;
  59. if (width <= 0 || height <= 1) {
  60. LOGF(FRAME) << "Invalid width or height, width = " << width << ", height = " << height;
  61. }
  62. height = height & (~static_cast<int>(1));
  63. int y_stride = frame.stride[0];
  64. int uv_stride = frame.stride[1];
  65. cv::Mat bgr(height, width, CV_8UC3);
  66. uint8_t* dst_bgr24 = bgr.data;
  67. int dst_stride = width * 3;
  68. // kYvuH709Constants make it to BGR
  69. if (nv21)
  70. libyuv::NV21ToRGB24Matrix(y_plane, y_stride, uv_plane, uv_stride,
  71. dst_bgr24, dst_stride, &libyuv::kYvuH709Constants, width, height);
  72. else
  73. libyuv::NV12ToRGB24Matrix(y_plane, y_stride, uv_plane, uv_stride,
  74. dst_bgr24, dst_stride, &libyuv::kYvuH709Constants, width, height);
  75. return bgr;
  76. }
  77. static inline
  78. cv::Mat NV12ToBGR(const CNDataFrame& frame) {
  79. return YUV420SPToBGR(frame, false);
  80. }
  81. static inline
  82. cv::Mat NV21ToBGR(const CNDataFrame& frame) {
  83. return YUV420SPToBGR(frame, true);
  84. }
  85. static inline
  86. cv::Mat FrameToImageBGR(const CNDataFrame& frame) {
  87. switch (frame.fmt) {
  88. case CNDataFormat::CN_PIXEL_FORMAT_BGR24:
  89. return BGRToBGR(frame);
  90. case CNDataFormat::CN_PIXEL_FORMAT_RGB24:
  91. return RGBToBGR(frame);
  92. case CNDataFormat::CN_PIXEL_FORMAT_YUV420_NV12:
  93. return NV12ToBGR(frame);
  94. case CNDataFormat::CN_PIXEL_FORMAT_YUV420_NV21:
  95. return NV21ToBGR(frame);
  96. default:
  97. LOGF(FRAME) << "Unsupported pixel format. fmt[" << static_cast<int>(frame.fmt) << "]";
  98. }
  99. // never be here
  100. return cv::Mat();
  101. }
  102. } // namespace color_cvt
  103. cv::Mat CNDataFrame::ImageBGR() {
  104. std::lock_guard<std::mutex> lk(mtx);
  105. if (!bgr_mat.empty()) {
  106. return bgr_mat;
  107. }
  108. bgr_mat = color_cvt::FrameToImageBGR(*this);
  109. return bgr_mat;
  110. }
  111. cv::Mat CNDataFrame::ImageBGR_NO_OSD(){
  112. std::lock_guard<std::mutex> lk(mtx);
  113. if (!bgr_mat_ON.empty()){
  114. return bgr_mat_ON;
  115. }
  116. bgr_mat_ON = color_cvt::FrameToImageBGR(*this);
  117. return bgr_mat_ON;
  118. }
  119. size_t CNDataFrame::GetPlaneBytes(int plane_idx) const {
  120. if (plane_idx < 0 || plane_idx >= GetPlanes()) return 0;
  121. switch (fmt) {
  122. case CNDataFormat::CN_PIXEL_FORMAT_BGR24:
  123. case CNDataFormat::CN_PIXEL_FORMAT_RGB24:
  124. return height * stride[0] * 3;
  125. case CNDataFormat::CN_PIXEL_FORMAT_YUV420_NV12:
  126. case CNDataFormat::CN_PIXEL_FORMAT_YUV420_NV21:
  127. if (0 == plane_idx)
  128. return height * stride[0];
  129. else if (1 == plane_idx)
  130. return std::ceil(1.0 * height * stride[1] / 2);
  131. else
  132. LOGF(FRAME) << "plane index wrong.";
  133. default:
  134. return 0;
  135. }
  136. return 0;
  137. }
  138. size_t CNDataFrame::GetBytes() const {
  139. size_t bytes = 0;
  140. for (int i = 0; i < GetPlanes(); ++i) {
  141. bytes += GetPlaneBytes(i);
  142. }
  143. return bytes;
  144. }
  145. void CNDataFrame::CopyToSyncMem(void** ptr_src, bool dst_mlu) {
  146. if (this->deAllocator_ != nullptr) {
  147. /*cndecoder buffer will be used to avoid dev2dev copy*/
  148. if (dst_mlu) {
  149. for (int i = 0; i < GetPlanes(); i++) {
  150. size_t plane_size = GetPlaneBytes(i);
  151. this->data[i].reset(new (std::nothrow) CNSyncedMemory(plane_size, ctx.dev_id, ctx.ddr_channel));
  152. this->data[i]->SetMluData(ptr_src[i]);
  153. }
  154. return;
  155. }
  156. }
  157. /*deep copy*/
  158. if (this->ctx.dev_type == DevContext::DevType::MLU || this->ctx.dev_type == DevContext::DevType::CPU) {
  159. bool src_mlu = (this->ctx.dev_type == DevContext::DevType::MLU);
  160. size_t bytes = GetBytes();
  161. bytes = ROUND_UP(bytes, 64 * 1024);
  162. if (dst_mlu) {
  163. if (dst_device_id < 0 || (ctx.dev_type == DevContext::DevType::MLU && ctx.dev_id != dst_device_id)) {
  164. LOGF(FRAME) << "CopyToSyncMem: dst_device_id not set, or ctx.dev_id != dst_device_id"
  165. << "," << dst_device_id;
  166. std::terminate();
  167. return;
  168. }
  169. mlu_data = cnMluMemAlloc(bytes, dst_device_id);
  170. if (nullptr == mlu_data) {
  171. LOGF(FRAME) << "CopyToSyncMem: failed to alloc mlu memory";
  172. std::terminate();
  173. }
  174. } else {
  175. cpu_data = cnCpuMemAlloc(bytes);
  176. if (nullptr == cpu_data) {
  177. LOGF(FRAME) << "CopyToSyncMem: failed to alloc cpu memory";
  178. std::terminate();
  179. }
  180. }
  181. if (src_mlu && dst_mlu) {
  182. void* dst = mlu_data.get();
  183. for (int i = 0; i < GetPlanes(); i++) {
  184. size_t plane_size = GetPlaneBytes(i);
  185. MluDeviceGuard guard(dst_device_id); // dst_device_id is equal to ctx.devi_id
  186. cnrtRet_t ret = cnrtMemcpy(dst, ptr_src[i], plane_size, CNRT_MEM_TRANS_DIR_DEV2DEV);
  187. if (ret != CNRT_RET_SUCCESS) {
  188. LOGF(FRAME) << "CopyToSyncMem: failed to cnrtMemcpy(CNRT_MEM_TRANS_DIR_DEV2DEV)";
  189. }
  190. this->data[i].reset(new (std::nothrow) CNSyncedMemory(plane_size, dst_device_id));
  191. this->data[i]->SetMluData(dst);
  192. dst = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(dst) + plane_size);
  193. }
  194. } else if (src_mlu && !dst_mlu) {
  195. void* dst = cpu_data.get();
  196. for (int i = 0; i < GetPlanes(); i++) {
  197. size_t plane_size = GetPlaneBytes(i);
  198. MluDeviceGuard guard(ctx.dev_id);
  199. cnrtRet_t ret = cnrtMemcpy(dst, ptr_src[i], plane_size, CNRT_MEM_TRANS_DIR_DEV2HOST);
  200. if (ret != CNRT_RET_SUCCESS) {
  201. LOGF(FRAME) << "CopyToSyncMem: failed to cnrtMemcpy(CNRT_MEM_TRANS_DIR_DEV2HOST)";
  202. }
  203. this->data[i].reset(new (std::nothrow) CNSyncedMemory(plane_size, dst_device_id));
  204. this->data[i]->SetCpuData(dst);
  205. dst = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(dst) + plane_size);
  206. }
  207. } else if (!src_mlu && dst_mlu) {
  208. void* dst = mlu_data.get();
  209. for (int i = 0; i < GetPlanes(); i++) {
  210. size_t plane_size = GetPlaneBytes(i);
  211. MluDeviceGuard guard(dst_device_id);
  212. cnrtRet_t ret = cnrtMemcpy(dst, ptr_src[i], plane_size, CNRT_MEM_TRANS_DIR_HOST2DEV);
  213. if (ret != CNRT_RET_SUCCESS) {
  214. LOGF(FRAME) << "CopyToSyncMem: failed to cnrtMemcpy(CNRT_MEM_TRANS_DIR_HOST2DEV)";
  215. }
  216. this->data[i].reset(new (std::nothrow) CNSyncedMemory(plane_size, dst_device_id));
  217. this->data[i]->SetMluData(dst);
  218. dst = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(dst) + plane_size);
  219. }
  220. } else {
  221. void* dst = cpu_data.get();
  222. for (int i = 0; i < GetPlanes(); i++) {
  223. size_t plane_size = GetPlaneBytes(i);
  224. memcpy(dst, ptr_src[i], plane_size);
  225. this->data[i].reset(new (std::nothrow) CNSyncedMemory(plane_size, dst_device_id));
  226. this->data[i]->SetCpuData(dst);
  227. dst = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(dst) + plane_size);
  228. }
  229. }
  230. this->deAllocator_.reset(); // deep-copy is done, release dec-buf-ref
  231. } else {
  232. LOGF(FRAME) << "CopyToSyncMem: Unsupported type";
  233. std::terminate();
  234. }
  235. }
  236. void CNDataFrame::CopyToSyncMemOnDevice(int device_id) {
  237. // only support mlu memory sync between different devices
  238. if (this->ctx.dev_id != device_id && this->ctx.dev_type == DevContext::DevType::MLU) {
  239. unsigned int can_peer = 0;
  240. CALL_CNRT_BY_CONTEXT(cnrtGetPeerAccessibility(&can_peer, device_id, this->ctx.dev_id), this->ctx.dev_id,
  241. this->ctx.ddr_channel);
  242. if (1 != can_peer) {
  243. LOGF(FRAME) << "dst device: " << device_id << " is not peerable to src device: " << this->ctx.dev_id;
  244. }
  245. // malloc memory on device_id
  246. std::shared_ptr<void> peerdev_data = nullptr;
  247. size_t bytes = GetBytes();
  248. bytes = ROUND_UP(bytes, 64 * 1024);
  249. peerdev_data = cnMluMemAlloc(bytes, device_id);
  250. if (nullptr == peerdev_data) {
  251. LOGF(FRAME) << "CopyToSyncMemOnDevice: failed to alloc mlu memory";
  252. }
  253. // copy data to mlu memory on device_id
  254. if (deAllocator_ != nullptr) {
  255. mlu_data = peerdev_data;
  256. void* dst = mlu_data.get();
  257. for (int i = 0; i < GetPlanes(); i++) {
  258. size_t plane_size = GetPlaneBytes(i);
  259. CNS_CNRT_CHECK(cnrtMemcpy(dst, data[i]->GetMutableMluData(), plane_size, CNRT_MEM_TRANS_DIR_PEER2PEER));
  260. this->data[i].reset(new (std::nothrow) CNSyncedMemory(plane_size, device_id, ctx.ddr_channel));
  261. this->data[i]->SetMluData(dst);
  262. dst = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(dst) + plane_size);
  263. }
  264. } else if (nullptr != mlu_data) {
  265. CNS_CNRT_CHECK(cnrtMemcpy(peerdev_data.get(), mlu_data.get(), bytes, CNRT_MEM_TRANS_DIR_PEER2PEER));
  266. mlu_data = peerdev_data;
  267. void* dst = mlu_data.get();
  268. for (int i = 0; i < GetPlanes(); i++) {
  269. size_t plane_size = GetPlaneBytes(i);
  270. this->data[i].reset(new (std::nothrow) CNSyncedMemory(plane_size, device_id, ctx.ddr_channel));
  271. this->data[i]->SetMluData(dst);
  272. dst = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(dst) + plane_size);
  273. }
  274. } else {
  275. LOGF(FRAME) << "invalid mlu data.";
  276. }
  277. } else {
  278. LOGF(FRAME) << "only support mlu memory sync between different devices.";
  279. }
  280. // reset ctx.dev_id to device_id (for SOURCE data)
  281. this->ctx.dev_id = device_id;
  282. // reset dst_device_id to device id (for CNSyncedMemory data)
  283. dst_device_id = device_id;
  284. }
  285. bool CNInferObject::AddAttribute(const std::string& key, const CNInferAttr& value) {
  286. std::lock_guard<std::mutex> lk(attribute_mutex_);
  287. if (attributes_.find(key) != attributes_.end()) return false;
  288. attributes_.insert(std::make_pair(key, value));
  289. return true;
  290. }
  291. bool CNInferObject::AddAttribute(const std::pair<std::string, CNInferAttr>& attribute) {
  292. std::lock_guard<std::mutex> lk(attribute_mutex_);
  293. if (attributes_.find(attribute.first) != attributes_.end()) return false;
  294. attributes_.insert(attribute);
  295. return true;
  296. }
  297. CNInferAttr CNInferObject::GetAttribute(const std::string& key) {
  298. std::lock_guard<std::mutex> lk(attribute_mutex_);
  299. if (attributes_.find(key) != attributes_.end()) return attributes_[key];
  300. return CNInferAttr();
  301. }
  302. bool CNInferObject::AddExtraAttribute(const std::string& key, const std::string& value) {
  303. std::lock_guard<std::mutex> lk(attribute_mutex_);
  304. if (extra_attributes_.find(key) != extra_attributes_.end()) return false;
  305. extra_attributes_.insert(std::make_pair(key, value));
  306. return true;
  307. }
  308. bool CNInferObject::AddExtraAttributes(const std::vector<std::pair<std::string, std::string>>& attributes) {
  309. std::lock_guard<std::mutex> lk(attribute_mutex_);
  310. bool ret = true;
  311. for (auto& attribute : attributes) {
  312. ret &= AddExtraAttribute(attribute.first, attribute.second);
  313. }
  314. return ret;
  315. }
  316. std::string CNInferObject::GetExtraAttribute(const std::string& key) {
  317. std::lock_guard<std::mutex> lk(attribute_mutex_);
  318. if (extra_attributes_.find(key) != extra_attributes_.end()) {
  319. return extra_attributes_[key];
  320. }
  321. return "";
  322. }
  323. bool CNInferObject::RemoveExtraAttribute(const std::string& key) {
  324. std::lock_guard<std::mutex> lk(attribute_mutex_);
  325. if (extra_attributes_.find(key) != extra_attributes_.end()) {
  326. extra_attributes_.erase(key);
  327. }
  328. return true;
  329. }
  330. StringPairs CNInferObject::GetExtraAttributes() {
  331. std::lock_guard<std::mutex> lk(attribute_mutex_);
  332. return StringPairs(extra_attributes_.begin(), extra_attributes_.end());
  333. }
  334. bool CNInferObject::AddFeature(const std::string& key, const CNInferFeature& feature) {
  335. std::lock_guard<std::mutex> lk(feature_mutex_);
  336. if (features_.find(key) != features_.end()) {
  337. return false;
  338. }
  339. features_.insert(std::make_pair(key, feature));
  340. return true;
  341. }
  342. CNInferFeature CNInferObject::GetFeature(const std::string& key) {
  343. std::lock_guard<std::mutex> lk(feature_mutex_);
  344. if (features_.find(key) != features_.end()) {
  345. return features_[key];
  346. }
  347. return CNInferFeature();
  348. }
  349. CNInferFeatures CNInferObject::GetFeatures() {
  350. std::lock_guard<std::mutex> lk(feature_mutex_);
  351. return CNInferFeatures(features_.begin(), features_.end());
  352. }
  353. } // namespace cnstream