libyuv-1766.patch 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009
  1. diff -rNua libyuv-o/include/libyuv/convert_argb.h libyuv/include/libyuv/convert_argb.h
  2. --- libyuv-o/include/libyuv/convert_argb.h 2020-10-23 15:16:00.638324198 +0800
  3. +++ libyuv/include/libyuv/convert_argb.h 2020-10-23 15:16:11.390462130 +0800
  4. @@ -51,6 +51,9 @@
  5. I420AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
  6. // Alias.
  7. +#define RAWCopy RGB24Copy
  8. +#define RAWToRAW RGB24Copy
  9. +#define RGB24ToRAW RAWToRGB24
  10. #define ARGBToARGB ARGBCopy
  11. // Copy ARGB to ARGB.
  12. @@ -62,6 +65,16 @@
  13. int width,
  14. int height);
  15. +// Copy RGB24 to RGB24
  16. +#define RGB24ToRGB24 RGB24Copy
  17. +LIBYUV_API
  18. +int RGB24Copy(const uint8_t* src_rgb24,
  19. + int src_stride_rgb24,
  20. + uint8_t* dst_rgb24,
  21. + int dst_stride_rgb24,
  22. + int width,
  23. + int height);
  24. +
  25. // Convert I420 to ARGB.
  26. LIBYUV_API
  27. int I420ToARGB(const uint8_t* src_y,
  28. diff -rNua libyuv-o/include/libyuv/convert.h libyuv/include/libyuv/convert.h
  29. --- libyuv-o/include/libyuv/convert.h 2020-10-23 15:16:00.638324198 +0800
  30. +++ libyuv/include/libyuv/convert.h 2020-10-23 15:16:11.390462130 +0800
  31. @@ -122,6 +122,22 @@
  32. int width,
  33. int height);
  34. +// Copy NV12 to NV12.
  35. +#define NV21Copy NV12Copy
  36. +#define NV12ToNV12 NV12Copy
  37. +#define NV21ToNV21 NV12Copy
  38. +LIBYUV_API
  39. +int NV12Copy(const uint8_t* src_y,
  40. + int src_stride_y,
  41. + const uint8_t* src_uv,
  42. + int src_stride_uv,
  43. + uint8_t* dst_y,
  44. + int dst_stride_y,
  45. + uint8_t* dst_uv,
  46. + int dst_stride_uv,
  47. + int width,
  48. + int height);
  49. +
  50. // Copy I010 to I010
  51. #define I010ToI010 I010Copy
  52. #define H010ToH010 I010Copy
  53. @@ -346,6 +362,28 @@
  54. int width,
  55. int height);
  56. +// RGB little endian (bgr in memory) to NV12.
  57. +LIBYUV_API
  58. +int RGB24ToNV12(const uint8_t* src_rgb24,
  59. + int src_stride_rgb24,
  60. + uint8_t* dst_y,
  61. + int dst_stride_y,
  62. + uint8_t* dst_uv,
  63. + int dst_stride_uv,
  64. + int width,
  65. + int height);
  66. +
  67. +// RGB little endian (bgr in memory) to NV21.
  68. +LIBYUV_API
  69. +int RGB24ToNV21(const uint8_t* src_rgb24,
  70. + int src_stride_rgb24,
  71. + uint8_t* dst_y,
  72. + int dst_stride_y,
  73. + uint8_t* dst_vu,
  74. + int dst_stride_vu,
  75. + int width,
  76. + int height);
  77. +
  78. // RGB little endian (bgr in memory) to J420.
  79. LIBYUV_API
  80. int RGB24ToJ420(const uint8_t* src_rgb24,
  81. @@ -372,6 +410,28 @@
  82. int width,
  83. int height);
  84. +// RGB big endian (rgb in memory) to NV12.
  85. +LIBYUV_API
  86. +int RAWToNV12(const uint8_t* src_raw,
  87. + int src_stride_raw,
  88. + uint8_t* dst_y,
  89. + int dst_stride_y,
  90. + uint8_t* dst_uv,
  91. + int dst_stride_uv,
  92. + int width,
  93. + int height);
  94. +
  95. +// RGB big endian (rgb in memory) to NV21.
  96. +LIBYUV_API
  97. +int RAWToNV21(const uint8_t* src_raw,
  98. + int src_stride_raw,
  99. + uint8_t* dst_y,
  100. + int dst_stride_y,
  101. + uint8_t* dst_vu,
  102. + int dst_stride_vu,
  103. + int width,
  104. + int height);
  105. +
  106. // RGB16 (RGBP fourcc) little endian to I420.
  107. LIBYUV_API
  108. int RGB565ToI420(const uint8_t* src_rgb565,
  109. diff -rNua libyuv-o/source/convert_argb.cc libyuv/source/convert_argb.cc
  110. --- libyuv-o/source/convert_argb.cc 2020-10-23 15:16:00.642324251 +0800
  111. +++ libyuv/source/convert_argb.cc 2020-10-23 15:16:11.390462130 +0800
  112. @@ -47,6 +47,29 @@
  113. return 0;
  114. }
  115. +// Copy RGB24 with optional flipping
  116. +LIBYUV_API
  117. +int RGB24Copy(const uint8_t* src_rgb24,
  118. + int src_stride_rgb24,
  119. + uint8_t* dst_rgb24,
  120. + int dst_stride_rgb24,
  121. + int width,
  122. + int height) {
  123. + if (!src_rgb24 || !dst_rgb24 || width <= 0 || height == 0) {
  124. + return -1;
  125. + }
  126. + // Negative height means invert the image.
  127. + if (height < 0) {
  128. + height = -height;
  129. + src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
  130. + src_stride_rgb24 = -src_stride_rgb24;
  131. + }
  132. +
  133. + CopyPlane(src_rgb24, src_stride_rgb24, dst_rgb24, dst_stride_rgb24,
  134. + width * 3, height);
  135. + return 0;
  136. +}
  137. +
  138. // Convert I420 to ARGB with matrix.
  139. LIBYUV_API
  140. int I420ToARGBMatrix(const uint8_t* src_y,
  141. diff -rNua libyuv-o/source/convert.cc libyuv/source/convert.cc
  142. --- libyuv-o/source/convert.cc 2020-10-23 15:16:00.642324251 +0800
  143. +++ libyuv/source/convert.cc 2020-10-23 15:16:11.390462130 +0800
  144. @@ -106,6 +106,41 @@
  145. return 0;
  146. }
  147. +// Copy NV12 with optional flipping.
  148. +// TODO(fbarchard): Use Scale plane which supports mirroring, but ensure
  149. +// is does row coalescing.
  150. +LIBYUV_API
  151. +int NV12Copy(const uint8_t* src_y,
  152. + int src_stride_y,
  153. + const uint8_t* src_uv,
  154. + int src_stride_uv,
  155. + uint8_t* dst_y,
  156. + int dst_stride_y,
  157. + uint8_t* dst_uv,
  158. + int dst_stride_uv,
  159. + int width,
  160. + int height) {
  161. + int halfheight = (height + 1) >> 1;
  162. + if (!src_uv || !dst_uv || width <= 0 || height == 0) {
  163. + return -1;
  164. + }
  165. + // Negative height means invert the image.
  166. + if (height < 0) {
  167. + height = -height;
  168. + src_y = src_y + (height - 1) * src_stride_y;
  169. + src_uv = src_uv + (height - 1) * src_stride_uv;
  170. + src_stride_y = -src_stride_y;
  171. + src_stride_uv = -src_stride_uv;
  172. + }
  173. +
  174. + if (dst_y) {
  175. + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
  176. + }
  177. + // Copy UV planes.
  178. + CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, width, halfheight);
  179. + return 0;
  180. +}
  181. +
  182. // Copy I010 with optional flipping.
  183. LIBYUV_API
  184. int I010Copy(const uint16_t* src_y,
  185. @@ -1477,6 +1512,408 @@
  186. return 0;
  187. }
  188. +// Convert RGB24 to NV12.
  189. +LIBYUV_API
  190. +int RGB24ToNV12(const uint8_t* src_rgb24,
  191. + int src_stride_rgb24,
  192. + uint8_t* dst_y,
  193. + int dst_stride_y,
  194. + uint8_t* dst_uv,
  195. + int dst_stride_uv,
  196. + int width,
  197. + int height) {
  198. + int y;
  199. + int halfwidth = (width + 1) >> 1;
  200. +#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
  201. + defined(HAS_RGB24TOYROW_MMI))
  202. + void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
  203. + uint8_t* dst_u, uint8_t* dst_v, int width) =
  204. + RGB24ToUVRow_C;
  205. + void (*RGB24ToYRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) =
  206. + RGB24ToYRow_C;
  207. +#else
  208. + void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
  209. + RGB24ToARGBRow_C;
  210. + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
  211. + uint8_t* dst_u, uint8_t* dst_v, int width) =
  212. + ARGBToUVRow_C;
  213. + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
  214. + ARGBToYRow_C;
  215. +#endif
  216. + void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
  217. + uint8_t* dst_uv, int width) = MergeUVRow_C;
  218. + if (!src_rgb24 || !dst_y || !dst_uv || width <= 0 || height == 0) {
  219. + return -1;
  220. + }
  221. + // Negative height means invert the image.
  222. + if (height < 0) {
  223. + height = -height;
  224. + src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
  225. + src_stride_rgb24 = -src_stride_rgb24;
  226. + }
  227. +
  228. +// Neon version does direct RGB24 to YUV.
  229. +#if defined(HAS_RGB24TOYROW_NEON)
  230. + if (TestCpuFlag(kCpuHasNEON)) {
  231. + RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
  232. + RGB24ToYRow = RGB24ToYRow_Any_NEON;
  233. + if (IS_ALIGNED(width, 8)) {
  234. + RGB24ToYRow = RGB24ToYRow_NEON;
  235. + if (IS_ALIGNED(width, 16)) {
  236. + RGB24ToUVRow = RGB24ToUVRow_NEON;
  237. + }
  238. + }
  239. + }
  240. +// MMI and MSA version does direct RGB24 to YUV.
  241. +#elif (defined(HAS_RGB24TOYROW_MMI) || defined(HAS_RGB24TOYROW_MSA))
  242. +#if defined(HAS_RGB24TOYROW_MMI) && defined(HAS_RGB24TOUVROW_MMI)
  243. + if (TestCpuFlag(kCpuHasMMI)) {
  244. + RGB24ToUVRow = RGB24ToUVRow_Any_MMI;
  245. + RGB24ToYRow = RGB24ToYRow_Any_MMI;
  246. + if (IS_ALIGNED(width, 8)) {
  247. + RGB24ToYRow = RGB24ToYRow_MMI;
  248. + if (IS_ALIGNED(width, 16)) {
  249. + RGB24ToUVRow = RGB24ToUVRow_MMI;
  250. + }
  251. + }
  252. + }
  253. +#endif
  254. +#if defined(HAS_RGB24TOYROW_MSA) && defined(HAS_RGB24TOUVROW_MSA)
  255. + if (TestCpuFlag(kCpuHasMSA)) {
  256. + RGB24ToUVRow = RGB24ToUVRow_Any_MSA;
  257. + RGB24ToYRow = RGB24ToYRow_Any_MSA;
  258. + if (IS_ALIGNED(width, 16)) {
  259. + RGB24ToYRow = RGB24ToYRow_MSA;
  260. + RGB24ToUVRow = RGB24ToUVRow_MSA;
  261. + }
  262. + }
  263. +#endif
  264. +// Other platforms do intermediate conversion from RGB24 to ARGB.
  265. +#else
  266. +#if defined(HAS_RGB24TOARGBROW_SSSE3)
  267. + if (TestCpuFlag(kCpuHasSSSE3)) {
  268. + RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
  269. + if (IS_ALIGNED(width, 16)) {
  270. + RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
  271. + }
  272. + }
  273. +#endif
  274. +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
  275. + if (TestCpuFlag(kCpuHasSSSE3)) {
  276. + ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
  277. + ARGBToYRow = ARGBToYRow_Any_SSSE3;
  278. + if (IS_ALIGNED(width, 16)) {
  279. + ARGBToUVRow = ARGBToUVRow_SSSE3;
  280. + ARGBToYRow = ARGBToYRow_SSSE3;
  281. + }
  282. + }
  283. +#endif
  284. +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
  285. + if (TestCpuFlag(kCpuHasAVX2)) {
  286. + ARGBToUVRow = ARGBToUVRow_Any_AVX2;
  287. + ARGBToYRow = ARGBToYRow_Any_AVX2;
  288. + if (IS_ALIGNED(width, 32)) {
  289. + ARGBToUVRow = ARGBToUVRow_AVX2;
  290. + ARGBToYRow = ARGBToYRow_AVX2;
  291. + }
  292. + }
  293. +#endif
  294. +#endif
  295. +#if defined(HAS_MERGEUVROW_SSE2)
  296. + if (TestCpuFlag(kCpuHasSSE2)) {
  297. + MergeUVRow_ = MergeUVRow_Any_SSE2;
  298. + if (IS_ALIGNED(halfwidth, 16)) {
  299. + MergeUVRow_ = MergeUVRow_SSE2;
  300. + }
  301. + }
  302. +#endif
  303. +#if defined(HAS_MERGEUVROW_AVX2)
  304. + if (TestCpuFlag(kCpuHasAVX2)) {
  305. + MergeUVRow_ = MergeUVRow_Any_AVX2;
  306. + if (IS_ALIGNED(halfwidth, 32)) {
  307. + MergeUVRow_ = MergeUVRow_AVX2;
  308. + }
  309. + }
  310. +#endif
  311. +#if defined(HAS_MERGEUVROW_NEON)
  312. + if (TestCpuFlag(kCpuHasNEON)) {
  313. + MergeUVRow_ = MergeUVRow_Any_NEON;
  314. + if (IS_ALIGNED(halfwidth, 16)) {
  315. + MergeUVRow_ = MergeUVRow_NEON;
  316. + }
  317. + }
  318. +#endif
  319. +#if defined(HAS_MERGEUVROW_MMI)
  320. + if (TestCpuFlag(kCpuHasMMI)) {
  321. + MergeUVRow_ = MergeUVRow_Any_MMI;
  322. + if (IS_ALIGNED(halfwidth, 8)) {
  323. + MergeUVRow_ = MergeUVRow_MMI;
  324. + }
  325. + }
  326. +#endif
  327. +#if defined(HAS_MERGEUVROW_MSA)
  328. + if (TestCpuFlag(kCpuHasMSA)) {
  329. + MergeUVRow_ = MergeUVRow_Any_MSA;
  330. + if (IS_ALIGNED(halfwidth, 16)) {
  331. + MergeUVRow_ = MergeUVRow_MSA;
  332. + }
  333. + }
  334. +#endif
  335. +
  336. + {
  337. + // Allocate a rows of uv.
  338. + align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
  339. + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
  340. +
  341. +#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
  342. + defined(HAS_RGB24TOYROW_MMI))
  343. + // Allocate 2 rows of ARGB.
  344. + const int kRowSize = (width * 4 + 31) & ~31;
  345. + align_buffer_64(row, kRowSize * 2);
  346. +#endif
  347. +
  348. + for (y = 0; y < height - 1; y += 2) {
  349. +#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
  350. + defined(HAS_RGB24TOYROW_MMI))
  351. + RGB24ToUVRow(src_rgb24, src_stride_rgb24, row_u, row_v, width);
  352. + MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
  353. + RGB24ToYRow(src_rgb24, dst_y, width);
  354. + RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
  355. +#else
  356. + RGB24ToARGBRow(src_rgb24, row, width);
  357. + RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
  358. + ARGBToUVRow(row, kRowSize, row_u, row_v, width);
  359. + MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
  360. + ARGBToYRow(row, dst_y, width);
  361. + ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
  362. +#endif
  363. + src_rgb24 += src_stride_rgb24 * 2;
  364. + dst_y += dst_stride_y * 2;
  365. + dst_uv += dst_stride_uv;
  366. + }
  367. + if (height & 1) {
  368. +#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
  369. + defined(HAS_RGB24TOYROW_MMI))
  370. + RGB24ToUVRow(src_rgb24, 0, row_u, row_v, width);
  371. + MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
  372. + RGB24ToYRow(src_rgb24, dst_y, width);
  373. +#else
  374. + RGB24ToARGBRow(src_rgb24, row, width);
  375. + ARGBToUVRow(row, 0, row_u, row_v, width);
  376. + MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
  377. + ARGBToYRow(row, dst_y, width);
  378. +#endif
  379. + }
  380. +#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
  381. + defined(HAS_RGB24TOYROW_MMI))
  382. + free_aligned_buffer_64(row);
  383. +#endif
  384. + free_aligned_buffer_64(row_u);
  385. + }
  386. + return 0;
  387. +}
  388. +
  389. +// Convert RGB24 to NV21.
  390. +LIBYUV_API
  391. +int RGB24ToNV21(const uint8_t* src_rgb24,
  392. + int src_stride_rgb24,
  393. + uint8_t* dst_y,
  394. + int dst_stride_y,
  395. + uint8_t* dst_vu,
  396. + int dst_stride_vu,
  397. + int width,
  398. + int height) {
  399. + int y;
  400. + int halfwidth = (width + 1) >> 1;
  401. +#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
  402. + defined(HAS_RGB24TOYROW_MMI))
  403. + void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
  404. + uint8_t* dst_u, uint8_t* dst_v, int width) =
  405. + RGB24ToUVRow_C;
  406. + void (*RGB24ToYRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) =
  407. + RGB24ToYRow_C;
  408. +#else
  409. + void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
  410. + RGB24ToARGBRow_C;
  411. + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
  412. + uint8_t* dst_u, uint8_t* dst_v, int width) =
  413. + ARGBToUVRow_C;
  414. + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
  415. + ARGBToYRow_C;
  416. +#endif
  417. + void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
  418. + uint8_t* dst_uv, int width) = MergeUVRow_C;
  419. + if (!src_rgb24 || !dst_y || !dst_vu || width <= 0 || height == 0) {
  420. + return -1;
  421. + }
  422. + // Negative height means invert the image.
  423. + if (height < 0) {
  424. + height = -height;
  425. + src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
  426. + src_stride_rgb24 = -src_stride_rgb24;
  427. + }
  428. +
  429. +// Neon version does direct RGB24 to YUV.
  430. +#if defined(HAS_RGB24TOYROW_NEON)
  431. + if (TestCpuFlag(kCpuHasNEON)) {
  432. + RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
  433. + RGB24ToYRow = RGB24ToYRow_Any_NEON;
  434. + if (IS_ALIGNED(width, 8)) {
  435. + RGB24ToYRow = RGB24ToYRow_NEON;
  436. + if (IS_ALIGNED(width, 16)) {
  437. + RGB24ToUVRow = RGB24ToUVRow_NEON;
  438. + }
  439. + }
  440. + }
  441. +// MMI and MSA version does direct RGB24 to YUV.
  442. +#elif (defined(HAS_RGB24TOYROW_MMI) || defined(HAS_RGB24TOYROW_MSA))
  443. +#if defined(HAS_RGB24TOYROW_MMI) && defined(HAS_RGB24TOUVROW_MMI)
  444. + if (TestCpuFlag(kCpuHasMMI)) {
  445. + RGB24ToUVRow = RGB24ToUVRow_Any_MMI;
  446. + RGB24ToYRow = RGB24ToYRow_Any_MMI;
  447. + if (IS_ALIGNED(width, 8)) {
  448. + RGB24ToYRow = RGB24ToYRow_MMI;
  449. + if (IS_ALIGNED(width, 16)) {
  450. + RGB24ToUVRow = RGB24ToUVRow_MMI;
  451. + }
  452. + }
  453. + }
  454. +#endif
  455. +#if defined(HAS_RGB24TOYROW_MSA) && defined(HAS_RGB24TOUVROW_MSA)
  456. + if (TestCpuFlag(kCpuHasMSA)) {
  457. + RGB24ToUVRow = RGB24ToUVRow_Any_MSA;
  458. + RGB24ToYRow = RGB24ToYRow_Any_MSA;
  459. + if (IS_ALIGNED(width, 16)) {
  460. + RGB24ToYRow = RGB24ToYRow_MSA;
  461. + RGB24ToUVRow = RGB24ToUVRow_MSA;
  462. + }
  463. + }
  464. +#endif
  465. +// Other platforms do intermediate conversion from RGB24 to ARGB.
  466. +#else
  467. +#if defined(HAS_RGB24TOARGBROW_SSSE3)
  468. + if (TestCpuFlag(kCpuHasSSSE3)) {
  469. + RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
  470. + if (IS_ALIGNED(width, 16)) {
  471. + RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
  472. + }
  473. + }
  474. +#endif
  475. +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
  476. + if (TestCpuFlag(kCpuHasSSSE3)) {
  477. + ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
  478. + ARGBToYRow = ARGBToYRow_Any_SSSE3;
  479. + if (IS_ALIGNED(width, 16)) {
  480. + ARGBToUVRow = ARGBToUVRow_SSSE3;
  481. + ARGBToYRow = ARGBToYRow_SSSE3;
  482. + }
  483. + }
  484. +#endif
  485. +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
  486. + if (TestCpuFlag(kCpuHasAVX2)) {
  487. + ARGBToUVRow = ARGBToUVRow_Any_AVX2;
  488. + ARGBToYRow = ARGBToYRow_Any_AVX2;
  489. + if (IS_ALIGNED(width, 32)) {
  490. + ARGBToUVRow = ARGBToUVRow_AVX2;
  491. + ARGBToYRow = ARGBToYRow_AVX2;
  492. + }
  493. + }
  494. +#endif
  495. +#endif
  496. +#if defined(HAS_MERGEUVROW_SSE2)
  497. + if (TestCpuFlag(kCpuHasSSE2)) {
  498. + MergeUVRow_ = MergeUVRow_Any_SSE2;
  499. + if (IS_ALIGNED(halfwidth, 16)) {
  500. + MergeUVRow_ = MergeUVRow_SSE2;
  501. + }
  502. + }
  503. +#endif
  504. +#if defined(HAS_MERGEUVROW_AVX2)
  505. + if (TestCpuFlag(kCpuHasAVX2)) {
  506. + MergeUVRow_ = MergeUVRow_Any_AVX2;
  507. + if (IS_ALIGNED(halfwidth, 32)) {
  508. + MergeUVRow_ = MergeUVRow_AVX2;
  509. + }
  510. + }
  511. +#endif
  512. +#if defined(HAS_MERGEUVROW_NEON)
  513. + if (TestCpuFlag(kCpuHasNEON)) {
  514. + MergeUVRow_ = MergeUVRow_Any_NEON;
  515. + if (IS_ALIGNED(halfwidth, 16)) {
  516. + MergeUVRow_ = MergeUVRow_NEON;
  517. + }
  518. + }
  519. +#endif
  520. +#if defined(HAS_MERGEUVROW_MMI)
  521. + if (TestCpuFlag(kCpuHasMMI)) {
  522. + MergeUVRow_ = MergeUVRow_Any_MMI;
  523. + if (IS_ALIGNED(halfwidth, 8)) {
  524. + MergeUVRow_ = MergeUVRow_MMI;
  525. + }
  526. + }
  527. +#endif
  528. +#if defined(HAS_MERGEUVROW_MSA)
  529. + if (TestCpuFlag(kCpuHasMSA)) {
  530. + MergeUVRow_ = MergeUVRow_Any_MSA;
  531. + if (IS_ALIGNED(halfwidth, 16)) {
  532. + MergeUVRow_ = MergeUVRow_MSA;
  533. + }
  534. + }
  535. +#endif
  536. +
  537. + {
  538. + // Allocate a rows of uv.
  539. + align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
  540. + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
  541. +
  542. +#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
  543. + defined(HAS_RGB24TOYROW_MMI))
  544. + // Allocate 2 rows of ARGB.
  545. + const int kRowSize = (width * 4 + 31) & ~31;
  546. + align_buffer_64(row, kRowSize * 2);
  547. +#endif
  548. +
  549. + for (y = 0; y < height - 1; y += 2) {
  550. +#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
  551. + defined(HAS_RGB24TOYROW_MMI))
  552. + RGB24ToUVRow(src_rgb24, src_stride_rgb24, row_u, row_v, width);
  553. + MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
  554. + RGB24ToYRow(src_rgb24, dst_y, width);
  555. + RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
  556. +#else
  557. + RGB24ToARGBRow(src_rgb24, row, width);
  558. + RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
  559. + ARGBToUVRow(row, kRowSize, row_u, row_v, width);
  560. + MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
  561. + ARGBToYRow(row, dst_y, width);
  562. + ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
  563. +#endif
  564. + src_rgb24 += src_stride_rgb24 * 2;
  565. + dst_y += dst_stride_y * 2;
  566. + dst_vu += dst_stride_vu;
  567. + }
  568. + if (height & 1) {
  569. +#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
  570. + defined(HAS_RGB24TOYROW_MMI))
  571. + RGB24ToUVRow(src_rgb24, 0, row_u, row_v, width);
  572. + MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
  573. + RGB24ToYRow(src_rgb24, dst_y, width);
  574. +#else
  575. + RGB24ToARGBRow(src_rgb24, row, width);
  576. + ARGBToUVRow(row, 0, row_u, row_v, width);
  577. + MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
  578. + ARGBToYRow(row, dst_y, width);
  579. +#endif
  580. + }
  581. +#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
  582. + defined(HAS_RGB24TOYROW_MMI))
  583. + free_aligned_buffer_64(row);
  584. +#endif
  585. + free_aligned_buffer_64(row_u);
  586. + }
  587. + return 0;
  588. +}
  589. +
  590. // TODO(fbarchard): Use Matrix version to implement I420 and J420.
  591. // Convert RGB24 to J420.
  592. LIBYUV_API
  593. @@ -1779,6 +2216,406 @@
  594. }
  595. return 0;
  596. }
  597. +
  598. +// Convert RAW to NV12.
  599. +LIBYUV_API
  600. +int RAWToNV12(const uint8_t* src_raw,
  601. + int src_stride_raw,
  602. + uint8_t* dst_y,
  603. + int dst_stride_y,
  604. + uint8_t* dst_uv,
  605. + int dst_stride_uv,
  606. + int width,
  607. + int height) {
  608. + int y;
  609. + int halfwidth = (width + 1) >> 1;
  610. +#if (defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)) || \
  611. + defined(HAS_RAWTOYROW_MSA) || defined(HAS_RAWTOYROW_MMI)
  612. + void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u,
  613. + uint8_t* dst_v, int width) = RAWToUVRow_C;
  614. + void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
  615. + RAWToYRow_C;
  616. +#else
  617. + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
  618. + RAWToARGBRow_C;
  619. + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
  620. + uint8_t* dst_u, uint8_t* dst_v, int width) =
  621. + ARGBToUVRow_C;
  622. + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
  623. + ARGBToYRow_C;
  624. +#endif
  625. + void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
  626. + uint8_t* dst_uv, int width) = MergeUVRow_C;
  627. + if (!src_raw || !dst_y || !dst_uv || width <= 0 || height == 0) {
  628. + return -1;
  629. + }
  630. + // Negative height means invert the image.
  631. + if (height < 0) {
  632. + height = -height;
  633. + src_raw = src_raw + (height - 1) * src_stride_raw;
  634. + src_stride_raw = -src_stride_raw;
  635. + }
  636. +
  637. +// Neon version does direct RAW to YUV.
  638. +#if defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)
  639. + if (TestCpuFlag(kCpuHasNEON)) {
  640. + RAWToUVRow = RAWToUVRow_Any_NEON;
  641. + RAWToYRow = RAWToYRow_Any_NEON;
  642. + if (IS_ALIGNED(width, 8)) {
  643. + RAWToYRow = RAWToYRow_NEON;
  644. + if (IS_ALIGNED(width, 16)) {
  645. + RAWToUVRow = RAWToUVRow_NEON;
  646. + }
  647. + }
  648. + }
  649. +// MMI and MSA version does direct RAW to YUV.
  650. +#elif (defined(HAS_RAWTOYROW_MMI) || defined(HAS_RAWTOYROW_MSA))
  651. +#if defined(HAS_RAWTOYROW_MMI) && defined(HAS_RAWTOUVROW_MMI)
  652. + if (TestCpuFlag(kCpuHasMMI)) {
  653. + RAWToUVRow = RAWToUVRow_Any_MMI;
  654. + RAWToYRow = RAWToYRow_Any_MMI;
  655. + if (IS_ALIGNED(width, 8)) {
  656. + RAWToYRow = RAWToYRow_MMI;
  657. + if (IS_ALIGNED(width, 16)) {
  658. + RAWToUVRow = RAWToUVRow_MMI;
  659. + }
  660. + }
  661. + }
  662. +#endif
  663. +#if defined(HAS_RAWTOYROW_MSA) && defined(HAS_RAWTOUVROW_MSA)
  664. + if (TestCpuFlag(kCpuHasMSA)) {
  665. + RAWToUVRow = RAWToUVRow_Any_MSA;
  666. + RAWToYRow = RAWToYRow_Any_MSA;
  667. + if (IS_ALIGNED(width, 16)) {
  668. + RAWToYRow = RAWToYRow_MSA;
  669. + RAWToUVRow = RAWToUVRow_MSA;
  670. + }
  671. + }
  672. +#endif
  673. +// Other platforms do intermediate conversion from RAW to ARGB.
  674. +#else
  675. +#if defined(HAS_RAWTOARGBROW_SSSE3)
  676. + if (TestCpuFlag(kCpuHasSSSE3)) {
  677. + RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
  678. + if (IS_ALIGNED(width, 16)) {
  679. + RAWToARGBRow = RAWToARGBRow_SSSE3;
  680. + }
  681. + }
  682. +#endif
  683. +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
  684. + if (TestCpuFlag(kCpuHasSSSE3)) {
  685. + ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
  686. + ARGBToYRow = ARGBToYRow_Any_SSSE3;
  687. + if (IS_ALIGNED(width, 16)) {
  688. + ARGBToUVRow = ARGBToUVRow_SSSE3;
  689. + ARGBToYRow = ARGBToYRow_SSSE3;
  690. + }
  691. + }
  692. +#endif
  693. +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
  694. + if (TestCpuFlag(kCpuHasAVX2)) {
  695. + ARGBToUVRow = ARGBToUVRow_Any_AVX2;
  696. + ARGBToYRow = ARGBToYRow_Any_AVX2;
  697. + if (IS_ALIGNED(width, 32)) {
  698. + ARGBToUVRow = ARGBToUVRow_AVX2;
  699. + ARGBToYRow = ARGBToYRow_AVX2;
  700. + }
  701. + }
  702. +#endif
  703. +#endif
  704. +#if defined(HAS_MERGEUVROW_SSE2)
  705. + if (TestCpuFlag(kCpuHasSSE2)) {
  706. + MergeUVRow_ = MergeUVRow_Any_SSE2;
  707. + if (IS_ALIGNED(halfwidth, 16)) {
  708. + MergeUVRow_ = MergeUVRow_SSE2;
  709. + }
  710. + }
  711. +#endif
  712. +#if defined(HAS_MERGEUVROW_AVX2)
  713. + if (TestCpuFlag(kCpuHasAVX2)) {
  714. + MergeUVRow_ = MergeUVRow_Any_AVX2;
  715. + if (IS_ALIGNED(halfwidth, 32)) {
  716. + MergeUVRow_ = MergeUVRow_AVX2;
  717. + }
  718. + }
  719. +#endif
  720. +#if defined(HAS_MERGEUVROW_NEON)
  721. + if (TestCpuFlag(kCpuHasNEON)) {
  722. + MergeUVRow_ = MergeUVRow_Any_NEON;
  723. + if (IS_ALIGNED(halfwidth, 16)) {
  724. + MergeUVRow_ = MergeUVRow_NEON;
  725. + }
  726. + }
  727. +#endif
  728. +#if defined(HAS_MERGEUVROW_MMI)
  729. + if (TestCpuFlag(kCpuHasMMI)) {
  730. + MergeUVRow_ = MergeUVRow_Any_MMI;
  731. + if (IS_ALIGNED(halfwidth, 8)) {
  732. + MergeUVRow_ = MergeUVRow_MMI;
  733. + }
  734. + }
  735. +#endif
  736. +#if defined(HAS_MERGEUVROW_MSA)
  737. + if (TestCpuFlag(kCpuHasMSA)) {
  738. + MergeUVRow_ = MergeUVRow_Any_MSA;
  739. + if (IS_ALIGNED(halfwidth, 16)) {
  740. + MergeUVRow_ = MergeUVRow_MSA;
  741. + }
  742. + }
  743. +#endif
  744. +
  745. + {
  746. + // Allocate a rows of uv.
  747. + align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
  748. + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
  749. +
  750. +#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
  751. + defined(HAS_RAWTOYROW_MMI))
  752. + // Allocate 2 rows of ARGB.
  753. + const int kRowSize = (width * 4 + 31) & ~31;
  754. + align_buffer_64(row, kRowSize * 2);
  755. +#endif
  756. +
  757. + for (y = 0; y < height - 1; y += 2) {
  758. +#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
  759. + defined(HAS_RAWTOYROW_MMI))
  760. + RAWToUVRow(src_raw, src_stride_raw, row_u, row_v, width);
  761. + MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
  762. + RAWToYRow(src_raw, dst_y, width);
  763. + RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
  764. +#else
  765. + RAWToARGBRow(src_raw, row, width);
  766. + RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
  767. + ARGBToUVRow(row, kRowSize, row_u, row_v, width);
  768. + MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
  769. + ARGBToYRow(row, dst_y, width);
  770. + ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
  771. +#endif
  772. + src_raw += src_stride_raw * 2;
  773. + dst_y += dst_stride_y * 2;
  774. + dst_uv += dst_stride_uv;
  775. + }
  776. + if (height & 1) {
  777. +#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
  778. + defined(HAS_RAWTOYROW_MMI))
  779. + RAWToUVRow(src_raw, 0, row_u, row_v, width);
  780. + MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
  781. + RAWToYRow(src_raw, dst_y, width);
  782. +#else
  783. + RAWToARGBRow(src_raw, row, width);
  784. + ARGBToUVRow(row, 0, row_u, row_v, width);
  785. + MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
  786. + ARGBToYRow(row, dst_y, width);
  787. +#endif
  788. + }
  789. +#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
  790. + defined(HAS_RAWTOYROW_MMI))
  791. + free_aligned_buffer_64(row);
  792. +#endif
  793. + free_aligned_buffer_64(row_u);
  794. + }
  795. + return 0;
  796. +}
  797. +
  798. +// Convert RAW to NV21.
  799. +LIBYUV_API
  800. +int RAWToNV21(const uint8_t* src_raw,
  801. + int src_stride_raw,
  802. + uint8_t* dst_y,
  803. + int dst_stride_y,
  804. + uint8_t* dst_vu,
  805. + int dst_stride_vu,
  806. + int width,
  807. + int height) {
  808. + int y;
  809. + int halfwidth = (width + 1) >> 1;
  810. +#if (defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)) || \
  811. + defined(HAS_RAWTOYROW_MSA) || defined(HAS_RAWTOYROW_MMI)
  812. + void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u,
  813. + uint8_t* dst_v, int width) = RAWToUVRow_C;
  814. + void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
  815. + RAWToYRow_C;
  816. +#else
  817. + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
  818. + RAWToARGBRow_C;
  819. + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
  820. + uint8_t* dst_u, uint8_t* dst_v, int width) =
  821. + ARGBToUVRow_C;
  822. + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
  823. + ARGBToYRow_C;
  824. +#endif
  825. + void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
  826. + uint8_t* dst_uv, int width) = MergeUVRow_C;
  827. + if (!src_raw || !dst_y || !dst_vu || width <= 0 || height == 0) {
  828. + return -1;
  829. + }
  830. + // Negative height means invert the image.
  831. + if (height < 0) {
  832. + height = -height;
  833. + src_raw = src_raw + (height - 1) * src_stride_raw;
  834. + src_stride_raw = -src_stride_raw;
  835. + }
  836. +
  837. +// Neon version does direct RAW to YUV.
  838. +#if defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)
  839. + if (TestCpuFlag(kCpuHasNEON)) {
  840. + RAWToUVRow = RAWToUVRow_Any_NEON;
  841. + RAWToYRow = RAWToYRow_Any_NEON;
  842. + if (IS_ALIGNED(width, 8)) {
  843. + RAWToYRow = RAWToYRow_NEON;
  844. + if (IS_ALIGNED(width, 16)) {
  845. + RAWToUVRow = RAWToUVRow_NEON;
  846. + }
  847. + }
  848. + }
  849. +// MMI and MSA version does direct RAW to YUV.
  850. +#elif (defined(HAS_RAWTOYROW_MMI) || defined(HAS_RAWTOYROW_MSA))
  851. +#if defined(HAS_RAWTOYROW_MMI) && defined(HAS_RAWTOUVROW_MMI)
  852. + if (TestCpuFlag(kCpuHasMMI)) {
  853. + RAWToUVRow = RAWToUVRow_Any_MMI;
  854. + RAWToYRow = RAWToYRow_Any_MMI;
  855. + if (IS_ALIGNED(width, 8)) {
  856. + RAWToYRow = RAWToYRow_MMI;
  857. + if (IS_ALIGNED(width, 16)) {
  858. + RAWToUVRow = RAWToUVRow_MMI;
  859. + }
  860. + }
  861. + }
  862. +#endif
  863. +#if defined(HAS_RAWTOYROW_MSA) && defined(HAS_RAWTOUVROW_MSA)
  864. + if (TestCpuFlag(kCpuHasMSA)) {
  865. + RAWToUVRow = RAWToUVRow_Any_MSA;
  866. + RAWToYRow = RAWToYRow_Any_MSA;
  867. + if (IS_ALIGNED(width, 16)) {
  868. + RAWToYRow = RAWToYRow_MSA;
  869. + RAWToUVRow = RAWToUVRow_MSA;
  870. + }
  871. + }
  872. +#endif
  873. +// Other platforms do intermediate conversion from RAW to ARGB.
  874. +#else
  875. +#if defined(HAS_RAWTOARGBROW_SSSE3)
  876. + if (TestCpuFlag(kCpuHasSSSE3)) {
  877. + RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
  878. + if (IS_ALIGNED(width, 16)) {
  879. + RAWToARGBRow = RAWToARGBRow_SSSE3;
  880. + }
  881. + }
  882. +#endif
  883. +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
  884. + if (TestCpuFlag(kCpuHasSSSE3)) {
  885. + ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
  886. + ARGBToYRow = ARGBToYRow_Any_SSSE3;
  887. + if (IS_ALIGNED(width, 16)) {
  888. + ARGBToUVRow = ARGBToUVRow_SSSE3;
  889. + ARGBToYRow = ARGBToYRow_SSSE3;
  890. + }
  891. + }
  892. +#endif
  893. +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
  894. + if (TestCpuFlag(kCpuHasAVX2)) {
  895. + ARGBToUVRow = ARGBToUVRow_Any_AVX2;
  896. + ARGBToYRow = ARGBToYRow_Any_AVX2;
  897. + if (IS_ALIGNED(width, 32)) {
  898. + ARGBToUVRow = ARGBToUVRow_AVX2;
  899. + ARGBToYRow = ARGBToYRow_AVX2;
  900. + }
  901. + }
  902. +#endif
  903. +#endif
  904. +#if defined(HAS_MERGEUVROW_SSE2)
  905. + if (TestCpuFlag(kCpuHasSSE2)) {
  906. + MergeUVRow_ = MergeUVRow_Any_SSE2;
  907. + if (IS_ALIGNED(halfwidth, 16)) {
  908. + MergeUVRow_ = MergeUVRow_SSE2;
  909. + }
  910. + }
  911. +#endif
  912. +#if defined(HAS_MERGEUVROW_AVX2)
  913. + if (TestCpuFlag(kCpuHasAVX2)) {
  914. + MergeUVRow_ = MergeUVRow_Any_AVX2;
  915. + if (IS_ALIGNED(halfwidth, 32)) {
  916. + MergeUVRow_ = MergeUVRow_AVX2;
  917. + }
  918. + }
  919. +#endif
  920. +#if defined(HAS_MERGEUVROW_NEON)
  921. + if (TestCpuFlag(kCpuHasNEON)) {
  922. + MergeUVRow_ = MergeUVRow_Any_NEON;
  923. + if (IS_ALIGNED(halfwidth, 16)) {
  924. + MergeUVRow_ = MergeUVRow_NEON;
  925. + }
  926. + }
  927. +#endif
  928. +#if defined(HAS_MERGEUVROW_MMI)
  929. + if (TestCpuFlag(kCpuHasMMI)) {
  930. + MergeUVRow_ = MergeUVRow_Any_MMI;
  931. + if (IS_ALIGNED(halfwidth, 8)) {
  932. + MergeUVRow_ = MergeUVRow_MMI;
  933. + }
  934. + }
  935. +#endif
  936. +#if defined(HAS_MERGEUVROW_MSA)
  937. + if (TestCpuFlag(kCpuHasMSA)) {
  938. + MergeUVRow_ = MergeUVRow_Any_MSA;
  939. + if (IS_ALIGNED(halfwidth, 16)) {
  940. + MergeUVRow_ = MergeUVRow_MSA;
  941. + }
  942. + }
  943. +#endif
  944. +
  945. + {
  946. + // Allocate a rows of uv.
  947. + align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
  948. + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
  949. +
  950. +#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
  951. + defined(HAS_RAWTOYROW_MMI))
  952. + // Allocate 2 rows of ARGB.
  953. + const int kRowSize = (width * 4 + 31) & ~31;
  954. + align_buffer_64(row, kRowSize * 2);
  955. +#endif
  956. +
  957. + for (y = 0; y < height - 1; y += 2) {
  958. +#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
  959. + defined(HAS_RAWTOYROW_MMI))
  960. + RAWToUVRow(src_raw, src_stride_raw, row_u, row_v, width);
  961. + MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
  962. + RAWToYRow(src_raw, dst_y, width);
  963. + RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
  964. +#else
  965. + RAWToARGBRow(src_raw, row, width);
  966. + RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
  967. + ARGBToUVRow(row, kRowSize, row_u, row_v, width);
  968. + MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
  969. + ARGBToYRow(row, dst_y, width);
  970. + ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
  971. +#endif
  972. + src_raw += src_stride_raw * 2;
  973. + dst_y += dst_stride_y * 2;
  974. + dst_vu += dst_stride_vu;
  975. + }
  976. + if (height & 1) {
  977. +#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
  978. + defined(HAS_RAWTOYROW_MMI))
  979. + RAWToUVRow(src_raw, 0, row_u, row_v, width);
  980. + MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
  981. + RAWToYRow(src_raw, dst_y, width);
  982. +#else
  983. + RAWToARGBRow(src_raw, row, width);
  984. + ARGBToUVRow(row, 0, row_u, row_v, width);
  985. + MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
  986. + ARGBToYRow(row, dst_y, width);
  987. +#endif
  988. + }
  989. +#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
  990. + defined(HAS_RAWTOYROW_MMI))
  991. + free_aligned_buffer_64(row);
  992. +#endif
  993. + free_aligned_buffer_64(row_u);
  994. + }
  995. + return 0;
  996. +}
  997. // Convert RGB565 to I420.
  998. LIBYUV_API