row_mmi.cc 467 KB


  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/row.h"
  11. #include <string.h> // For memcpy and memset.
  12. #include "libyuv/basic_types.h"
  13. #ifdef __cplusplus
  14. namespace libyuv {
  15. extern "C" {
  16. #endif
  17. // This module is for Mips MMI.
  18. #if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
  19. // clang-format off
  20. void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24,
  21. uint8_t* dst_argb,
  22. int width) {
  23. uint64_t src0, src1, dest;
  24. const uint64_t mask = 0xff000000ULL;
  25. __asm__ volatile(
  26. "1: \n\t"
  27. "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
  28. "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
  29. "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
  30. "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
  31. "or %[src0], %[src0], %[mask] \n\t"
  32. "or %[src1], %[src1], %[mask] \n\t"
  33. "punpcklwd %[dest], %[src0], %[src1] \n\t"
  34. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  35. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  36. "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t"
  37. "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t"
  38. "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t"
  39. "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t"
  40. "or %[src0], %[src0], %[mask] \n\t"
  41. "or %[src1], %[src1], %[mask] \n\t"
  42. "punpcklwd %[dest], %[src0], %[src1] \n\t"
  43. "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
  44. "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
  45. "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
  46. "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
  47. "daddi %[width], %[width], -0x04 \n\t"
  48. "bnez %[width], 1b \n\t"
  49. : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
  50. : [src_ptr] "r"(src_rgb24), [dst_ptr] "r"(dst_argb), [width] "r"(width),
  51. [mask] "f"(mask)
  52. : "memory");
  53. }
  54. void RAWToARGBRow_MMI(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
  55. uint64_t src0, src1, dest;
  56. const uint64_t mask0 = 0x0;
  57. const uint64_t mask1 = 0xff000000ULL;
  58. const uint64_t mask2 = 0xc6;
  59. __asm__ volatile(
  60. "1: \n\t"
  61. "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
  62. "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
  63. "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
  64. "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
  65. "or %[src0], %[src0], %[mask1] \n\t"
  66. "punpcklbh %[src0], %[src0], %[mask0] \n\t"
  67. "pshufh %[src0], %[src0], %[mask2] \n\t"
  68. "or %[src1], %[src1], %[mask1] \n\t"
  69. "punpcklbh %[src1], %[src1], %[mask0] \n\t"
  70. "pshufh %[src1], %[src1], %[mask2] \n\t"
  71. "packushb %[dest], %[src0], %[src1] \n\t"
  72. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  73. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  74. "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t"
  75. "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t"
  76. "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t"
  77. "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t"
  78. "or %[src0], %[src0], %[mask1] \n\t"
  79. "punpcklbh %[src0], %[src0], %[mask0] \n\t"
  80. "pshufh %[src0], %[src0], %[mask2] \n\t"
  81. "or %[src1], %[src1], %[mask1] \n\t"
  82. "punpcklbh %[src1], %[src1], %[mask0] \n\t"
  83. "pshufh %[src1], %[src1], %[mask2] \n\t"
  84. "packushb %[dest], %[src0], %[src1] \n\t"
  85. "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
  86. "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
  87. "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
  88. "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
  89. "daddi %[width], %[width], -0x04 \n\t"
  90. "bnez %[width], 1b \n\t"
  91. : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
  92. : [src_ptr] "r"(src_raw), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
  93. [mask1] "f"(mask1), [mask2] "f"(mask2), [width] "r"(width)
  94. : "memory");
  95. }
  96. void RAWToRGB24Row_MMI(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
  97. uint64_t src0, src1;
  98. uint64_t ftmp[4];
  99. uint64_t mask0 = 0xc6;
  100. uint64_t mask1 = 0x6c;
  101. __asm__ volatile(
  102. "1: \n\t"
  103. "gsldrc1 %[src0], 0x00(%[src_raw]) \n\t"
  104. "gsldlc1 %[src0], 0x07(%[src_raw]) \n\t"
  105. "gslwrc1 %[src1], 0x08(%[src_raw]) \n\t"
  106. "gslwlc1 %[src1], 0x0b(%[src_raw]) \n\t"
  107. "punpcklbh %[ftmp0], %[src0], %[zero] \n\t"
  108. "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t"
  109. "punpckhbh %[ftmp1], %[src0], %[zero] \n\t"
  110. "punpcklbh %[src1], %[src1], %[zero] \n\t"
  111. "pextrh %[ftmp2], %[ftmp0], %[three] \n\t"
  112. "pextrh %[ftmp3], %[ftmp1], %[one] \n\t"
  113. "pinsrh_3 %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
  114. "pextrh %[ftmp3], %[ftmp1], %[two] \n\t"
  115. "pinsrh_1 %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
  116. "pshufh %[src1], %[src1], %[mask1] \n\t"
  117. "pextrh %[ftmp2], %[src1], %[zero] \n\t"
  118. "pinsrh_2 %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
  119. "pinsrh_0 %[src1], %[src1], %[ftmp3] \n\t"
  120. "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
  121. "packushb %[src1], %[src1], %[zero] \n\t"
  122. "gssdrc1 %[ftmp0], 0x00(%[dst_rgb24]) \n\t"
  123. "gssdlc1 %[ftmp0], 0x07(%[dst_rgb24]) \n\t"
  124. "gsswrc1 %[src1], 0x08(%[dst_rgb24]) \n\t"
  125. "gsswlc1 %[src1], 0x0b(%[dst_rgb24]) \n\t"
  126. "daddiu %[src_raw], %[src_raw], 0x0c \n\t"
  127. "daddiu %[dst_rgb24], %[dst_rgb24], 0x0c \n\t"
  128. "daddiu %[width], %[width], -0x04 \n\t"
  129. "bgtz %[width], 1b \n\t"
  130. : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]),
  131. [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3])
  132. : [src_raw] "r"(src_raw), [dst_rgb24] "r"(dst_rgb24), [width] "r"(width),
  133. [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00),
  134. [one] "f"(0x01), [two] "f"(0x02), [three] "f"(0x03)
  135. : "memory");
  136. }
  137. void RGB565ToARGBRow_MMI(const uint8_t* src_rgb565,
  138. uint8_t* dst_argb,
  139. int width) {
  140. uint64_t ftmp[5];
  141. uint64_t c0 = 0x001f001f001f001f;
  142. uint64_t c1 = 0x00ff00ff00ff00ff;
  143. uint64_t c2 = 0x0007000700070007;
  144. __asm__ volatile(
  145. "1: \n\t"
  146. "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
  147. "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
  148. "psrlh %[src1], %[src0], %[eight] \n\t"
  149. "and %[b], %[src0], %[c0] \n\t"
  150. "and %[src0], %[src0], %[c1] \n\t"
  151. "psrlh %[src0], %[src0], %[five] \n\t"
  152. "and %[g], %[src1], %[c2] \n\t"
  153. "psllh %[g], %[g], %[three] \n\t"
  154. "or %[g], %[src0], %[g] \n\t"
  155. "psrlh %[r], %[src1], %[three] \n\t"
  156. "psllh %[src0], %[b], %[three] \n\t"
  157. "psrlh %[src1], %[b], %[two] \n\t"
  158. "or %[b], %[src0], %[src1] \n\t"
  159. "psllh %[src0], %[g], %[two] \n\t"
  160. "psrlh %[src1], %[g], %[four] \n\t"
  161. "or %[g], %[src0], %[src1] \n\t"
  162. "psllh %[src0], %[r], %[three] \n\t"
  163. "psrlh %[src1], %[r], %[two] \n\t"
  164. "or %[r], %[src0], %[src1] \n\t"
  165. "packushb %[b], %[b], %[r] \n\t"
  166. "packushb %[g], %[g], %[c1] \n\t"
  167. "punpcklbh %[src0], %[b], %[g] \n\t"
  168. "punpckhbh %[src1], %[b], %[g] \n\t"
  169. "punpcklhw %[r], %[src0], %[src1] \n\t"
  170. "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
  171. "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
  172. "punpckhhw %[r], %[src0], %[src1] \n\t"
  173. "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
  174. "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
  175. "daddiu %[src_rgb565], %[src_rgb565], 0x08 \n\t"
  176. "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
  177. "daddiu %[width], %[width], -0x04 \n\t"
  178. "bgtz %[width], 1b \n\t"
  179. : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
  180. [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4])
  181. : [src_rgb565] "r"(src_rgb565), [dst_argb] "r"(dst_argb),
  182. [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
  183. [eight] "f"(0x08), [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02),
  184. [four] "f"(0x04)
  185. : "memory");
  186. }
  187. void ARGB1555ToARGBRow_MMI(const uint8_t* src_argb1555,
  188. uint8_t* dst_argb,
  189. int width) {
  190. uint64_t ftmp[6];
  191. uint64_t c0 = 0x001f001f001f001f;
  192. uint64_t c1 = 0x00ff00ff00ff00ff;
  193. uint64_t c2 = 0x0003000300030003;
  194. uint64_t c3 = 0x007c007c007c007c;
  195. uint64_t c4 = 0x0001000100010001;
  196. __asm__ volatile(
  197. "1: \n\t"
  198. "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
  199. "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
  200. "psrlh %[src1], %[src0], %[eight] \n\t"
  201. "and %[b], %[src0], %[c0] \n\t"
  202. "and %[src0], %[src0], %[c1] \n\t"
  203. "psrlh %[src0], %[src0], %[five] \n\t"
  204. "and %[g], %[src1], %[c2] \n\t"
  205. "psllh %[g], %[g], %[three] \n\t"
  206. "or %[g], %[src0], %[g] \n\t"
  207. "and %[r], %[src1], %[c3] \n\t"
  208. "psrlh %[r], %[r], %[two] \n\t"
  209. "psrlh %[a], %[src1], %[seven] \n\t"
  210. "psllh %[src0], %[b], %[three] \n\t"
  211. "psrlh %[src1], %[b], %[two] \n\t"
  212. "or %[b], %[src0], %[src1] \n\t"
  213. "psllh %[src0], %[g], %[three] \n\t"
  214. "psrlh %[src1], %[g], %[two] \n\t"
  215. "or %[g], %[src0], %[src1] \n\t"
  216. "psllh %[src0], %[r], %[three] \n\t"
  217. "psrlh %[src1], %[r], %[two] \n\t"
  218. "or %[r], %[src0], %[src1] \n\t"
  219. "xor %[a], %[a], %[c1] \n\t"
  220. "paddb %[a], %[a], %[c4] \n\t"
  221. "packushb %[b], %[b], %[r] \n\t"
  222. "packushb %[g], %[g], %[a] \n\t"
  223. "punpcklbh %[src0], %[b], %[g] \n\t"
  224. "punpckhbh %[src1], %[b], %[g] \n\t"
  225. "punpcklhw %[r], %[src0], %[src1] \n\t"
  226. "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
  227. "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
  228. "punpckhhw %[r], %[src0], %[src1] \n\t"
  229. "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
  230. "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
  231. "daddiu %[src_argb1555], %[src_argb1555], 0x08 \n\t"
  232. "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
  233. "daddiu %[width], %[width], -0x04 \n\t"
  234. "bgtz %[width], 1b \n\t"
  235. : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
  236. [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5])
  237. : [src_argb1555] "r"(src_argb1555), [dst_argb] "r"(dst_argb),
  238. [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
  239. [c3] "f"(c3), [c4] "f"(c4), [eight] "f"(0x08), [five] "f"(0x05),
  240. [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07)
  241. : "memory");
  242. }
  243. void ARGB4444ToARGBRow_MMI(const uint8_t* src_argb4444,
  244. uint8_t* dst_argb,
  245. int width) {
  246. uint64_t ftmp[6];
  247. uint64_t c0 = 0x000f000f000f000f;
  248. uint64_t c1 = 0x00ff00ff00ff00ff;
  249. __asm__ volatile(
  250. "1: \n\t"
  251. "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
  252. "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
  253. "psrlh %[src1], %[src0], %[eight] \n\t"
  254. "and %[b], %[src0], %[c0] \n\t"
  255. "and %[src0], %[src0], %[c1] \n\t"
  256. "psrlh %[g], %[src0], %[four] \n\t"
  257. "and %[r], %[src1], %[c0] \n\t"
  258. "psrlh %[a], %[src1], %[four] \n\t"
  259. "psllh %[src0], %[b], %[four] \n\t"
  260. "or %[b], %[src0], %[b] \n\t"
  261. "psllh %[src0], %[g], %[four] \n\t"
  262. "or %[g], %[src0], %[g] \n\t"
  263. "psllh %[src0], %[r], %[four] \n\t"
  264. "or %[r], %[src0], %[r] \n\t"
  265. "psllh %[src0], %[a], %[four] \n\t"
  266. "or %[a], %[src0], %[a] \n\t"
  267. "packushb %[b], %[b], %[r] \n\t"
  268. "packushb %[g], %[g], %[a] \n\t"
  269. "punpcklbh %[src0], %[b], %[g] \n\t"
  270. "punpckhbh %[src1], %[b], %[g] \n\t"
  271. "punpcklhw %[r], %[src0], %[src1] \n\t"
  272. "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
  273. "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
  274. "punpckhhw %[r], %[src0], %[src1] \n\t"
  275. "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
  276. "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
  277. "daddiu %[src_argb4444], %[src_argb4444], 0x08 \n\t"
  278. "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
  279. "daddiu %[width], %[width], -0x04 \n\t"
  280. "bgtz %[width], 1b \n\t"
  281. : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
  282. [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5])
  283. : [src_argb4444] "r"(src_argb4444), [dst_argb] "r"(dst_argb),
  284. [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [eight] "f"(0x08),
  285. [four] "f"(0x04)
  286. : "memory");
  287. }
  288. void ARGBToRGB24Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
  289. uint64_t src;
  290. __asm__ volatile(
  291. "1: \n\t"
  292. "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t"
  293. "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t"
  294. "gsswlc1 %[src], 0x03(%[dst_ptr]) \n\t"
  295. "gsswrc1 %[src], 0x00(%[dst_ptr]) \n\t"
  296. "gslwlc1 %[src], 0x07(%[src_ptr]) \n\t"
  297. "gslwrc1 %[src], 0x04(%[src_ptr]) \n\t"
  298. "gsswlc1 %[src], 0x06(%[dst_ptr]) \n\t"
  299. "gsswrc1 %[src], 0x03(%[dst_ptr]) \n\t"
  300. "gslwlc1 %[src], 0x0b(%[src_ptr]) \n\t"
  301. "gslwrc1 %[src], 0x08(%[src_ptr]) \n\t"
  302. "gsswlc1 %[src], 0x09(%[dst_ptr]) \n\t"
  303. "gsswrc1 %[src], 0x06(%[dst_ptr]) \n\t"
  304. "gslwlc1 %[src], 0x0f(%[src_ptr]) \n\t"
  305. "gslwrc1 %[src], 0x0c(%[src_ptr]) \n\t"
  306. "gsswlc1 %[src], 0x0c(%[dst_ptr]) \n\t"
  307. "gsswrc1 %[src], 0x09(%[dst_ptr]) \n\t"
  308. "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
  309. "daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t"
  310. "daddi %[width], %[width], -0x04 \n\t"
  311. "bnez %[width], 1b \n\t"
  312. : [src] "=&f"(src)
  313. : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_rgb), [width] "r"(width)
  314. : "memory");
  315. }
  316. void ARGBToRAWRow_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
  317. uint64_t src0, src1;
  318. uint64_t ftmp[3];
  319. uint64_t mask0 = 0xc6;
  320. uint64_t mask1 = 0x18;
  321. __asm__ volatile(
  322. "1: \n\t"
  323. "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
  324. "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
  325. "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
  326. "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
  327. "punpcklbh %[ftmp0], %[src0], %[zero] \n\t"
  328. "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t"
  329. "punpckhbh %[ftmp1], %[src0], %[zero] \n\t"
  330. "punpcklbh %[ftmp2], %[src1], %[zero] \n\t"
  331. "punpckhbh %[src1], %[src1], %[zero] \n\t"
  332. "pextrh %[src0], %[ftmp1], %[two] \n\t"
  333. "pinsrh_3 %[ftmp0], %[ftmp0], %[src0] \n\t"
  334. "pshufh %[ftmp1], %[ftmp1], %[one] \n\t"
  335. "pextrh %[src0], %[ftmp2], %[two] \n\t"
  336. "pinsrh_2 %[ftmp1], %[ftmp1], %[src0] \n\t"
  337. "pextrh %[src0], %[ftmp2], %[one] \n\t"
  338. "pinsrh_3 %[ftmp1], %[ftmp1], %[src0] \n\t"
  339. "pextrh %[src0], %[ftmp2], %[zero] \n\t"
  340. "pshufh %[src1], %[src1], %[mask1] \n\t"
  341. "pinsrh_0 %[src1], %[src1], %[src0] \n\t"
  342. "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
  343. "packushb %[src1], %[src1], %[zero] \n\t"
  344. "gssdrc1 %[ftmp0], 0x00(%[dst_rgb]) \n\t"
  345. "gssdlc1 %[ftmp0], 0x07(%[dst_rgb]) \n\t"
  346. "gsswrc1 %[src1], 0x08(%[dst_rgb]) \n\t"
  347. "gsswlc1 %[src1], 0x0b(%[dst_rgb]) \n\t"
  348. "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
  349. "daddiu %[dst_rgb], %[dst_rgb], 0x0c \n\t"
  350. "daddiu %[width], %[width], -0x04 \n\t"
  351. "bgtz %[width], 1b \n\t"
  352. : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]),
  353. [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2])
  354. : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
  355. [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00),
  356. [one] "f"(0x01), [two] "f"(0x02)
  357. : "memory");
  358. }
  359. void ARGBToRGB565Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
  360. uint64_t src0, src1;
  361. uint64_t ftmp[3];
  362. __asm__ volatile(
  363. "1: \n\t"
  364. "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
  365. "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
  366. "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
  367. "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
  368. "punpcklbh %[b], %[src0], %[src1] \n\t"
  369. "punpckhbh %[g], %[src0], %[src1] \n\t"
  370. "punpcklbh %[src0], %[b], %[g] \n\t"
  371. "punpckhbh %[src1], %[b], %[g] \n\t"
  372. "punpcklbh %[b], %[src0], %[zero] \n\t"
  373. "punpckhbh %[g], %[src0], %[zero] \n\t"
  374. "punpcklbh %[r], %[src1], %[zero] \n\t"
  375. "psrlh %[b], %[b], %[three] \n\t"
  376. "psrlh %[g], %[g], %[two] \n\t"
  377. "psrlh %[r], %[r], %[three] \n\t"
  378. "psllh %[g], %[g], %[five] \n\t"
  379. "psllh %[r], %[r], %[eleven] \n\t"
  380. "or %[b], %[b], %[g] \n\t"
  381. "or %[b], %[b], %[r] \n\t"
  382. "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
  383. "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
  384. "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
  385. "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
  386. "daddiu %[width], %[width], -0x04 \n\t"
  387. "bgtz %[width], 1b \n\t"
  388. : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
  389. [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2])
  390. : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
  391. [zero] "f"(0x00), [two] "f"(0x02), [three] "f"(0x03), [five] "f"(0x05),
  392. [eleven] "f"(0x0b)
  393. : "memory");
  394. }
  395. // dither4 is a row of 4 values from 4x4 dither matrix.
  396. // The 4x4 matrix contains values to increase RGB. When converting to
  397. // fewer bits (565) this provides an ordered dither.
  398. // The order in the 4x4 matrix in first byte is upper left.
  399. // The 4 values are passed as an int, then referenced as an array, so
  400. // endian will not affect order of the original matrix. But the dither4
  401. // will containing the first pixel in the lower byte for little endian
  402. // or the upper byte for big endian.
  403. void ARGBToRGB565DitherRow_MMI(const uint8_t* src_argb,
  404. uint8_t* dst_rgb,
  405. const uint32_t dither4,
  406. int width) {
  407. uint64_t src0, src1;
  408. uint64_t ftmp[3];
  409. uint64_t c0 = 0x00ff00ff00ff00ff;
  410. __asm__ volatile(
  411. "punpcklbh %[dither], %[dither], %[zero] \n\t"
  412. "1: \n\t"
  413. "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
  414. "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
  415. "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
  416. "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
  417. "punpcklbh %[b], %[src0], %[src1] \n\t"
  418. "punpckhbh %[g], %[src0], %[src1] \n\t"
  419. "punpcklbh %[src0], %[b], %[g] \n\t"
  420. "punpckhbh %[src1], %[b], %[g] \n\t"
  421. "punpcklbh %[b], %[src0], %[zero] \n\t"
  422. "punpckhbh %[g], %[src0], %[zero] \n\t"
  423. "punpcklbh %[r], %[src1], %[zero] \n\t"
  424. "paddh %[b], %[b], %[dither] \n\t"
  425. "paddh %[g], %[g], %[dither] \n\t"
  426. "paddh %[r], %[r], %[dither] \n\t"
  427. "pcmpgth %[src0], %[b], %[c0] \n\t"
  428. "or %[src0], %[src0], %[b] \n\t"
  429. "and %[b], %[src0], %[c0] \n\t"
  430. "pcmpgth %[src0], %[g], %[c0] \n\t"
  431. "or %[src0], %[src0], %[g] \n\t"
  432. "and %[g], %[src0], %[c0] \n\t"
  433. "pcmpgth %[src0], %[r], %[c0] \n\t"
  434. "or %[src0], %[src0], %[r] \n\t"
  435. "and %[r], %[src0], %[c0] \n\t"
  436. "psrlh %[b], %[b], %[three] \n\t"
  437. "psrlh %[g], %[g], %[two] \n\t"
  438. "psrlh %[r], %[r], %[three] \n\t"
  439. "psllh %[g], %[g], %[five] \n\t"
  440. "psllh %[r], %[r], %[eleven] \n\t"
  441. "or %[b], %[b], %[g] \n\t"
  442. "or %[b], %[b], %[r] \n\t"
  443. "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
  444. "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
  445. "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
  446. "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
  447. "daddiu %[width], %[width], -0x04 \n\t"
  448. "bgtz %[width], 1b \n\t"
  449. : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
  450. [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2])
  451. : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
  452. [dither] "f"(dither4), [c0] "f"(c0), [zero] "f"(0x00), [two] "f"(0x02),
  453. [three] "f"(0x03), [five] "f"(0x05), [eleven] "f"(0x0b)
  454. : "memory");
  455. }
  456. void ARGBToARGB1555Row_MMI(const uint8_t* src_argb,
  457. uint8_t* dst_rgb,
  458. int width) {
  459. uint64_t src0, src1;
  460. uint64_t ftmp[4];
  461. __asm__ volatile(
  462. "1: \n\t"
  463. "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
  464. "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
  465. "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
  466. "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
  467. "punpcklbh %[b], %[src0], %[src1] \n\t"
  468. "punpckhbh %[g], %[src0], %[src1] \n\t"
  469. "punpcklbh %[src0], %[b], %[g] \n\t"
  470. "punpckhbh %[src1], %[b], %[g] \n\t"
  471. "punpcklbh %[b], %[src0], %[zero] \n\t"
  472. "punpckhbh %[g], %[src0], %[zero] \n\t"
  473. "punpcklbh %[r], %[src1], %[zero] \n\t"
  474. "punpckhbh %[a], %[src1], %[zero] \n\t"
  475. "psrlh %[b], %[b], %[three] \n\t"
  476. "psrlh %[g], %[g], %[three] \n\t"
  477. "psrlh %[r], %[r], %[three] \n\t"
  478. "psrlh %[a], %[a], %[seven] \n\t"
  479. "psllh %[g], %[g], %[five] \n\t"
  480. "psllh %[r], %[r], %[ten] \n\t"
  481. "psllh %[a], %[a], %[fifteen] \n\t"
  482. "or %[b], %[b], %[g] \n\t"
  483. "or %[b], %[b], %[r] \n\t"
  484. "or %[b], %[b], %[a] \n\t"
  485. "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
  486. "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
  487. "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
  488. "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
  489. "daddiu %[width], %[width], -0x04 \n\t"
  490. "bgtz %[width], 1b \n\t"
  491. : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
  492. [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3])
  493. : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
  494. [zero] "f"(0x00), [three] "f"(0x03), [five] "f"(0x05),
  495. [seven] "f"(0x07), [ten] "f"(0x0a), [fifteen] "f"(0x0f)
  496. : "memory");
  497. }
  498. void ARGBToARGB4444Row_MMI(const uint8_t* src_argb,
  499. uint8_t* dst_rgb,
  500. int width) {
  501. uint64_t src0, src1;
  502. uint64_t ftmp[4];
  503. __asm__ volatile(
  504. "1: \n\t"
  505. "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
  506. "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
  507. "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
  508. "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
  509. "punpcklbh %[b], %[src0], %[src1] \n\t"
  510. "punpckhbh %[g], %[src0], %[src1] \n\t"
  511. "punpcklbh %[src0], %[b], %[g] \n\t"
  512. "punpckhbh %[src1], %[b], %[g] \n\t"
  513. "punpcklbh %[b], %[src0], %[zero] \n\t"
  514. "punpckhbh %[g], %[src0], %[zero] \n\t"
  515. "punpcklbh %[r], %[src1], %[zero] \n\t"
  516. "punpckhbh %[a], %[src1], %[zero] \n\t"
  517. "psrlh %[b], %[b], %[four] \n\t"
  518. "psrlh %[g], %[g], %[four] \n\t"
  519. "psrlh %[r], %[r], %[four] \n\t"
  520. "psrlh %[a], %[a], %[four] \n\t"
  521. "psllh %[g], %[g], %[four] \n\t"
  522. "psllh %[r], %[r], %[eight] \n\t"
  523. "psllh %[a], %[a], %[twelve] \n\t"
  524. "or %[b], %[b], %[g] \n\t"
  525. "or %[b], %[b], %[r] \n\t"
  526. "or %[b], %[b], %[a] \n\t"
  527. "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
  528. "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
  529. "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
  530. "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
  531. "daddiu %[width], %[width], -0x04 \n\t"
  532. "bgtz %[width], 1b \n\t"
  533. : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
  534. [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3])
  535. : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
  536. [zero] "f"(0x00), [four] "f"(0x04), [eight] "f"(0x08),
  537. [twelve] "f"(0x0c)
  538. : "memory");
  539. }
  540. void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  541. uint64_t src, src_hi, src_lo;
  542. uint64_t dest0, dest1, dest2, dest3;
  543. const uint64_t value = 0x1080;
  544. const uint64_t mask = 0x0001004200810019;
  545. __asm__ volatile(
  546. "1: \n\t"
  547. "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
  548. "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
  549. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  550. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  551. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  552. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  553. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  554. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  555. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  556. "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
  557. "paddw %[dest0], %[dest0], %[src] \n\t"
  558. "psrlw %[dest0], %[dest0], %[eight] \n\t"
  559. "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
  560. "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
  561. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  562. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  563. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  564. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  565. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  566. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  567. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  568. "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
  569. "paddw %[dest1], %[dest1], %[src] \n\t"
  570. "psrlw %[dest1], %[dest1], %[eight] \n\t"
  571. "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
  572. "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
  573. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  574. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  575. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  576. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  577. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  578. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  579. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  580. "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
  581. "paddw %[dest2], %[dest2], %[src] \n\t"
  582. "psrlw %[dest2], %[dest2], %[eight] \n\t"
  583. "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
  584. "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
  585. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  586. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  587. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  588. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  589. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  590. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  591. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  592. "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
  593. "paddw %[dest3], %[dest3], %[src] \n\t"
  594. "psrlw %[dest3], %[dest3], %[eight] \n\t"
  595. "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
  596. "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
  597. "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
  598. "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
  599. "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
  600. "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
  601. "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
  602. "daddi %[width], %[width], -0x08 \n\t"
  603. "bnez %[width], 1b \n\t"
  604. : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
  605. [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
  606. [dest3] "=&f"(dest3)
  607. : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
  608. [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
  609. [zero] "f"(0x00)
  610. : "memory");
  611. }
  612. void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
  613. int src_stride_rgb,
  614. uint8_t* dst_u,
  615. uint8_t* dst_v,
  616. int width) {
  617. uint64_t src_rgb1;
  618. uint64_t ftmp[13];
  619. uint64_t tmp[1];
  620. const uint64_t value = 0x4040;
  621. const uint64_t mask_u = 0x0013002500380002;
  622. const uint64_t mask_v = 0x00020038002f0009;
  623. __asm__ volatile(
  624. "dli %[tmp0], 0x0001000100010001 \n\t"
  625. "dmtc1 %[tmp0], %[ftmp12] \n\t"
  626. "1: \n\t"
  627. "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
  628. "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
  629. "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
  630. "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
  631. "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
  632. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  633. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  634. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  635. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  636. "paddh %[src0], %[src0], %[src_lo] \n\t"
  637. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  638. "paddh %[src0], %[src0], %[src_hi] \n\t"
  639. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  640. "psrlh %[src0], %[src0], %[one] \n\t"
  641. "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
  642. "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
  643. "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
  644. "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
  645. "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
  646. "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
  647. "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
  648. "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
  649. "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
  650. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  651. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  652. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  653. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  654. "paddh %[src0], %[src0], %[src_lo] \n\t"
  655. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  656. "paddh %[src0], %[src0], %[src_hi] \n\t"
  657. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  658. "psrlh %[src0], %[src0], %[one] \n\t"
  659. "dsll %[src_lo], %[src0], %[sixteen] \n\t"
  660. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  661. "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
  662. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  663. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  664. "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
  665. "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
  666. "psubw %[dest0_u], %[src0], %[src1] \n\t"
  667. "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
  668. "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
  669. "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
  670. "psubw %[dest0_v], %[src1], %[src0] \n\t"
  671. "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
  672. "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
  673. "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
  674. "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
  675. "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
  676. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  677. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  678. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  679. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  680. "paddh %[src0], %[src0], %[src_lo] \n\t"
  681. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  682. "paddh %[src0], %[src0], %[src_hi] \n\t"
  683. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  684. "psrlh %[src0], %[src0], %[one] \n\t"
  685. "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
  686. "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
  687. "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
  688. "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
  689. "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
  690. "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
  691. "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
  692. "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
  693. "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
  694. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  695. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  696. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  697. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  698. "paddh %[src0], %[src0], %[src_lo] \n\t"
  699. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  700. "paddh %[src0], %[src0], %[src_hi] \n\t"
  701. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  702. "psrlh %[src0], %[src0], %[one] \n\t"
  703. "dsll %[src_lo], %[src0], %[sixteen] \n\t"
  704. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  705. "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
  706. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  707. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  708. "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
  709. "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
  710. "psubw %[dest1_u], %[src0], %[src1] \n\t"
  711. "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
  712. "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
  713. "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
  714. "psubw %[dest1_v], %[src1], %[src0] \n\t"
  715. "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
  716. "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
  717. "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
  718. "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
  719. "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
  720. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  721. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  722. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  723. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  724. "paddh %[src0], %[src0], %[src_lo] \n\t"
  725. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  726. "paddh %[src0], %[src0], %[src_hi] \n\t"
  727. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  728. "psrlh %[src0], %[src0], %[one] \n\t"
  729. "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
  730. "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
  731. "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
  732. "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
  733. "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
  734. "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
  735. "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
  736. "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
  737. "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
  738. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  739. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  740. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  741. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  742. "paddh %[src0], %[src0], %[src_lo] \n\t"
  743. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  744. "paddh %[src0], %[src0], %[src_hi] \n\t"
  745. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  746. "psrlh %[src0], %[src0], %[one] \n\t"
  747. "dsll %[src_lo], %[src0], %[sixteen] \n\t"
  748. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  749. "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
  750. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  751. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  752. "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
  753. "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
  754. "psubw %[dest2_u], %[src0], %[src1] \n\t"
  755. "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
  756. "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
  757. "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
  758. "psubw %[dest2_v], %[src1], %[src0] \n\t"
  759. "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
  760. "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
  761. "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
  762. "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
  763. "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
  764. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  765. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  766. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  767. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  768. "paddh %[src0], %[src0], %[src_lo] \n\t"
  769. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  770. "paddh %[src0], %[src0], %[src_hi] \n\t"
  771. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  772. "psrlh %[src0], %[src0], %[one] \n\t"
  773. "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
  774. "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
  775. "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
  776. "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
  777. "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
  778. "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
  779. "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
  780. "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
  781. "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
  782. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  783. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  784. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  785. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  786. "paddh %[src0], %[src0], %[src_lo] \n\t"
  787. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  788. "paddh %[src0], %[src0], %[src_hi] \n\t"
  789. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  790. "psrlh %[src0], %[src0], %[one] \n\t"
  791. "dsll %[src_lo], %[src0], %[sixteen] \n\t"
  792. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  793. "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
  794. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  795. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  796. "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
  797. "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
  798. "psubw %[dest3_u], %[src0], %[src1] \n\t"
  799. "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
  800. "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
  801. "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
  802. "psubw %[dest3_v], %[src1], %[src0] \n\t"
  803. "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
  804. "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
  805. "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
  806. "packushb %[dest0_u], %[src0], %[src1] \n\t"
  807. "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
  808. "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
  809. "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
  810. "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
  811. "packushb %[dest0_v], %[src0], %[src1] \n\t"
  812. "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
  813. "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
  814. "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
  815. "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
  816. "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
  817. "daddi %[width], %[width], -0x10 \n\t"
  818. "bgtz %[width], 1b \n\t"
  819. : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
  820. [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
  821. [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
  822. [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
  823. [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
  824. [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
  825. [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
  826. : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
  827. [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
  828. [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
  829. [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
  830. [sixteen] "f"(0x10)
  831. : "memory");
  832. }
  833. void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  834. uint64_t src, src_hi, src_lo;
  835. uint64_t dest0, dest1, dest2, dest3;
  836. const uint64_t value = 0x1080;
  837. const uint64_t mask = 0x0019008100420001;
  838. __asm__ volatile(
  839. "1: \n\t"
  840. "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
  841. "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
  842. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  843. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  844. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  845. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  846. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  847. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  848. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  849. "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
  850. "paddw %[dest0], %[dest0], %[src] \n\t"
  851. "psrlw %[dest0], %[dest0], %[eight] \n\t"
  852. "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
  853. "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
  854. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  855. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  856. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  857. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  858. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  859. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  860. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  861. "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
  862. "paddw %[dest1], %[dest1], %[src] \n\t"
  863. "psrlw %[dest1], %[dest1], %[eight] \n\t"
  864. "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
  865. "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
  866. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  867. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  868. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  869. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  870. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  871. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  872. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  873. "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
  874. "paddw %[dest2], %[dest2], %[src] \n\t"
  875. "psrlw %[dest2], %[dest2], %[eight] \n\t"
  876. "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
  877. "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
  878. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  879. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  880. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  881. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  882. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  883. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  884. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  885. "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
  886. "paddw %[dest3], %[dest3], %[src] \n\t"
  887. "psrlw %[dest3], %[dest3], %[eight] \n\t"
  888. "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
  889. "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
  890. "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
  891. "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
  892. "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
  893. "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
  894. "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
  895. "daddi %[width], %[width], -0x08 \n\t"
  896. "bnez %[width], 1b \n\t"
  897. : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
  898. [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
  899. [dest3] "=&f"(dest3)
  900. : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
  901. [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
  902. [zero] "f"(0x00)
  903. : "memory");
  904. }
  905. void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
  906. int src_stride_rgb,
  907. uint8_t* dst_u,
  908. uint8_t* dst_v,
  909. int width) {
  910. uint64_t src_rgb1;
  911. uint64_t ftmp[13];
  912. uint64_t tmp[1];
  913. const uint64_t value = 0x4040;
  914. const uint64_t mask_u = 0x0002003800250013;
  915. const uint64_t mask_v = 0x0009002f00380002;
  916. __asm__ volatile(
  917. "dli %[tmp0], 0x0001000100010001 \n\t"
  918. "dmtc1 %[tmp0], %[ftmp12] \n\t"
  919. "1: \n\t"
  920. "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
  921. "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
  922. "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
  923. "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
  924. "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
  925. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  926. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  927. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  928. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  929. "paddh %[src0], %[src0], %[src_lo] \n\t"
  930. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  931. "paddh %[src0], %[src0], %[src_hi] \n\t"
  932. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  933. "psrlh %[src0], %[src0], %[one] \n\t"
  934. "dsrl %[dest0_u], %[src0], %[sixteen] \n\t"
  935. "pinsrh_3 %[dest0_u], %[dest0_u], %[value] \n\t"
  936. "pinsrh_0 %[dest0_v], %[src0], %[value] \n\t"
  937. "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
  938. "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
  939. "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
  940. "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
  941. "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
  942. "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
  943. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  944. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  945. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  946. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  947. "paddh %[src0], %[src0], %[src_lo] \n\t"
  948. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  949. "paddh %[src0], %[src0], %[src_hi] \n\t"
  950. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  951. "psrlh %[src0], %[src0], %[one] \n\t"
  952. "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
  953. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  954. "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
  955. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  956. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  957. "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
  958. "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
  959. "psubw %[dest0_u], %[src1], %[src0] \n\t"
  960. "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
  961. "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
  962. "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
  963. "psubw %[dest0_v], %[src0], %[src1] \n\t"
  964. "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
  965. "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
  966. "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
  967. "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
  968. "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
  969. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  970. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  971. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  972. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  973. "paddh %[src0], %[src0], %[src_lo] \n\t"
  974. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  975. "paddh %[src0], %[src0], %[src_hi] \n\t"
  976. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  977. "psrlh %[src0], %[src0], %[one] \n\t"
  978. "dsrl %[dest1_u], %[src0], %[sixteen] \n\t"
  979. "pinsrh_3 %[dest1_u], %[dest1_u], %[value] \n\t"
  980. "pinsrh_0 %[dest1_v], %[src0], %[value] \n\t"
  981. "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
  982. "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
  983. "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
  984. "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
  985. "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
  986. "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
  987. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  988. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  989. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  990. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  991. "paddh %[src0], %[src0], %[src_lo] \n\t"
  992. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  993. "paddh %[src0], %[src0], %[src_hi] \n\t"
  994. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  995. "psrlh %[src0], %[src0], %[one] \n\t"
  996. "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
  997. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  998. "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
  999. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1000. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1001. "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
  1002. "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
  1003. "psubw %[dest1_u], %[src1], %[src0] \n\t"
  1004. "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
  1005. "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
  1006. "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
  1007. "psubw %[dest1_v], %[src0], %[src1] \n\t"
  1008. "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
  1009. "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
  1010. "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
  1011. "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
  1012. "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
  1013. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1014. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1015. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1016. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1017. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1018. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1019. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1020. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1021. "psrlh %[src0], %[src0], %[one] \n\t"
  1022. "dsrl %[dest2_u], %[src0], %[sixteen] \n\t"
  1023. "pinsrh_3 %[dest2_u], %[dest2_u], %[value] \n\t"
  1024. "pinsrh_0 %[dest2_v], %[src0], %[value] \n\t"
  1025. "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
  1026. "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
  1027. "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
  1028. "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
  1029. "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
  1030. "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
  1031. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1032. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1033. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1034. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1035. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1036. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1037. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1038. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1039. "psrlh %[src0], %[src0], %[one] \n\t"
  1040. "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
  1041. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  1042. "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
  1043. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1044. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1045. "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
  1046. "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
  1047. "psubw %[dest2_u], %[src1], %[src0] \n\t"
  1048. "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
  1049. "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
  1050. "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
  1051. "psubw %[dest2_v], %[src0], %[src1] \n\t"
  1052. "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
  1053. "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
  1054. "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
  1055. "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
  1056. "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
  1057. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1058. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1059. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1060. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1061. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1062. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1063. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1064. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1065. "psrlh %[src0], %[src0], %[one] \n\t"
  1066. "dsrl %[dest3_u], %[src0], %[sixteen] \n\t"
  1067. "pinsrh_3 %[dest3_u], %[dest3_u], %[value] \n\t"
  1068. "pinsrh_0 %[dest3_v], %[src0], %[value] \n\t"
  1069. "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
  1070. "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
  1071. "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
  1072. "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
  1073. "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
  1074. "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
  1075. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1076. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1077. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1078. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1079. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1080. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1081. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1082. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1083. "psrlh %[src0], %[src0], %[one] \n\t"
  1084. "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
  1085. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  1086. "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
  1087. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1088. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1089. "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
  1090. "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
  1091. "psubw %[dest3_u], %[src1], %[src0] \n\t"
  1092. "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
  1093. "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
  1094. "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
  1095. "psubw %[dest3_v], %[src0], %[src1] \n\t"
  1096. "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
  1097. "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
  1098. "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
  1099. "packushb %[dest0_u], %[src0], %[src1] \n\t"
  1100. "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
  1101. "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
  1102. "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
  1103. "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
  1104. "packushb %[dest0_v], %[src0], %[src1] \n\t"
  1105. "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
  1106. "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
  1107. "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
  1108. "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
  1109. "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
  1110. "daddi %[width], %[width], -0x10 \n\t"
  1111. "bgtz %[width], 1b \n\t"
  1112. : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
  1113. [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
  1114. [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
  1115. [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
  1116. [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
  1117. [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
  1118. [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
  1119. : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
  1120. [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
  1121. [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
  1122. [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
  1123. [sixteen] "f"(0x10)
  1124. : "memory");
  1125. }
  1126. void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  1127. uint64_t src, src_hi, src_lo;
  1128. uint64_t dest0, dest1, dest2, dest3;
  1129. const uint64_t value = 0x1080;
  1130. const uint64_t mask = 0x0001001900810042;
  1131. __asm__ volatile(
  1132. "1: \n\t"
  1133. "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
  1134. "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
  1135. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  1136. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  1137. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  1138. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  1139. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  1140. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  1141. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  1142. "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
  1143. "paddw %[dest0], %[dest0], %[src] \n\t"
  1144. "psrlw %[dest0], %[dest0], %[eight] \n\t"
  1145. "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
  1146. "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
  1147. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  1148. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  1149. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  1150. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  1151. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  1152. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  1153. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  1154. "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
  1155. "paddw %[dest1], %[dest1], %[src] \n\t"
  1156. "psrlw %[dest1], %[dest1], %[eight] \n\t"
  1157. "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
  1158. "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
  1159. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  1160. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  1161. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  1162. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  1163. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  1164. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  1165. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  1166. "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
  1167. "paddw %[dest2], %[dest2], %[src] \n\t"
  1168. "psrlw %[dest2], %[dest2], %[eight] \n\t"
  1169. "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
  1170. "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
  1171. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  1172. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  1173. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  1174. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  1175. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  1176. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  1177. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  1178. "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
  1179. "paddw %[dest3], %[dest3], %[src] \n\t"
  1180. "psrlw %[dest3], %[dest3], %[eight] \n\t"
  1181. "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
  1182. "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
  1183. "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
  1184. "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
  1185. "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
  1186. "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
  1187. "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
  1188. "daddi %[width], %[width], -0x08 \n\t"
  1189. "bnez %[width], 1b \n\t"
  1190. : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
  1191. [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
  1192. [dest3] "=&f"(dest3)
  1193. : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
  1194. [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
  1195. [zero] "f"(0x00)
  1196. : "memory");
  1197. }
  1198. void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
  1199. int src_stride_rgb,
  1200. uint8_t* dst_u,
  1201. uint8_t* dst_v,
  1202. int width) {
  1203. uint64_t src_rgb1;
  1204. uint64_t ftmp[13];
  1205. uint64_t tmp[1];
  1206. const uint64_t value = 0x4040;
  1207. const uint64_t mask_u = 0x0002003800250013;
  1208. const uint64_t mask_v = 0x0009002F00380002;
  1209. __asm__ volatile(
  1210. "dli %[tmp0], 0x0001000100010001 \n\t"
  1211. "dmtc1 %[tmp0], %[ftmp12] \n\t"
  1212. "1: \n\t"
  1213. "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
  1214. "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
  1215. "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
  1216. "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
  1217. "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
  1218. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1219. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1220. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1221. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1222. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1223. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1224. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1225. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1226. "psrlh %[src0], %[src0], %[one] \n\t"
  1227. "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
  1228. "dsll %[dest0_v], %[src0], %[sixteen] \n\t"
  1229. "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
  1230. "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
  1231. "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
  1232. "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
  1233. "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
  1234. "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
  1235. "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
  1236. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1237. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1238. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1239. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1240. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1241. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1242. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1243. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1244. "psrlh %[src0], %[src0], %[one] \n\t"
  1245. "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
  1246. "dsll %[src_hi], %[src0], %[sixteen] \n\t"
  1247. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  1248. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1249. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1250. "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
  1251. "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
  1252. "psubw %[dest0_u], %[src1], %[src0] \n\t"
  1253. "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
  1254. "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
  1255. "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
  1256. "psubw %[dest0_v], %[src0], %[src1] \n\t"
  1257. "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
  1258. "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
  1259. "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
  1260. "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
  1261. "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
  1262. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1263. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1264. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1265. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1266. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1267. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1268. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1269. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1270. "psrlh %[src0], %[src0], %[one] \n\t"
  1271. "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
  1272. "dsll %[dest1_v], %[src0], %[sixteen] \n\t"
  1273. "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
  1274. "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
  1275. "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
  1276. "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
  1277. "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
  1278. "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
  1279. "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
  1280. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1281. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1282. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1283. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1284. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1285. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1286. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1287. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1288. "psrlh %[src0], %[src0], %[one] \n\t"
  1289. "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
  1290. "dsll %[src_hi], %[src0], %[sixteen] \n\t"
  1291. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  1292. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1293. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1294. "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
  1295. "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
  1296. "psubw %[dest1_u], %[src1], %[src0] \n\t"
  1297. "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
  1298. "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
  1299. "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
  1300. "psubw %[dest1_v], %[src0], %[src1] \n\t"
  1301. "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
  1302. "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
  1303. "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
  1304. "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
  1305. "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
  1306. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1307. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1308. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1309. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1310. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1311. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1312. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1313. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1314. "psrlh %[src0], %[src0], %[one] \n\t"
  1315. "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
  1316. "dsll %[dest2_v], %[src0], %[sixteen] \n\t"
  1317. "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
  1318. "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
  1319. "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
  1320. "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
  1321. "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
  1322. "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
  1323. "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
  1324. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1325. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1326. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1327. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1328. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1329. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1330. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1331. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1332. "psrlh %[src0], %[src0], %[one] \n\t"
  1333. "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
  1334. "dsll %[src_hi], %[src0], %[sixteen] \n\t"
  1335. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  1336. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1337. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1338. "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
  1339. "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
  1340. "psubw %[dest2_u], %[src1], %[src0] \n\t"
  1341. "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
  1342. "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
  1343. "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
  1344. "psubw %[dest2_v], %[src0], %[src1] \n\t"
  1345. "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
  1346. "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
  1347. "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
  1348. "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
  1349. "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
  1350. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1351. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1352. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1353. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1354. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1355. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1356. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1357. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1358. "psrlh %[src0], %[src0], %[one] \n\t"
  1359. "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
  1360. "dsll %[dest3_v], %[src0], %[sixteen] \n\t"
  1361. "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
  1362. "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
  1363. "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
  1364. "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
  1365. "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
  1366. "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
  1367. "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
  1368. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1369. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1370. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1371. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1372. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1373. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1374. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1375. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1376. "psrlh %[src0], %[src0], %[one] \n\t"
  1377. "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
  1378. "dsll %[src_hi], %[src0], %[sixteen] \n\t"
  1379. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  1380. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1381. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1382. "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
  1383. "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
  1384. "psubw %[dest3_u], %[src1], %[src0] \n\t"
  1385. "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
  1386. "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
  1387. "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
  1388. "psubw %[dest3_v], %[src0], %[src1] \n\t"
  1389. "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
  1390. "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
  1391. "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
  1392. "packushb %[dest0_u], %[src0], %[src1] \n\t"
  1393. "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
  1394. "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
  1395. "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
  1396. "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
  1397. "packushb %[dest0_v], %[src0], %[src1] \n\t"
  1398. "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
  1399. "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
  1400. "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
  1401. "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
  1402. "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
  1403. "daddi %[width], %[width], -0x10 \n\t"
  1404. "bgtz %[width], 1b \n\t"
  1405. : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
  1406. [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
  1407. [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
  1408. [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
  1409. [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
  1410. [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
  1411. [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
  1412. : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
  1413. [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
  1414. [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
  1415. [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
  1416. [sixteen] "f"(0x10)
  1417. : "memory");
  1418. }
  1419. void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  1420. uint64_t src, src_hi, src_lo;
  1421. uint64_t dest0, dest1, dest2, dest3;
  1422. const uint64_t value = 0x1080;
  1423. const uint64_t mask = 0x0042008100190001;
  1424. __asm__ volatile(
  1425. "1: \n\t"
  1426. "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
  1427. "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
  1428. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  1429. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  1430. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  1431. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  1432. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  1433. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  1434. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  1435. "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
  1436. "paddw %[dest0], %[dest0], %[src] \n\t"
  1437. "psrlw %[dest0], %[dest0], %[eight] \n\t"
  1438. "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
  1439. "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
  1440. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  1441. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  1442. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  1443. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  1444. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  1445. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  1446. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  1447. "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
  1448. "paddw %[dest1], %[dest1], %[src] \n\t"
  1449. "psrlw %[dest1], %[dest1], %[eight] \n\t"
  1450. "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
  1451. "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
  1452. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  1453. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  1454. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  1455. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  1456. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  1457. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  1458. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  1459. "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
  1460. "paddw %[dest2], %[dest2], %[src] \n\t"
  1461. "psrlw %[dest2], %[dest2], %[eight] \n\t"
  1462. "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
  1463. "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
  1464. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  1465. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  1466. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  1467. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  1468. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  1469. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  1470. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  1471. "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
  1472. "paddw %[dest3], %[dest3], %[src] \n\t"
  1473. "psrlw %[dest3], %[dest3], %[eight] \n\t"
  1474. "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
  1475. "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
  1476. "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
  1477. "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
  1478. "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
  1479. "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
  1480. "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
  1481. "daddi %[width], %[width], -0x08 \n\t"
  1482. "bnez %[width], 1b \n\t"
  1483. : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
  1484. [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
  1485. [dest3] "=&f"(dest3)
  1486. : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
  1487. [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
  1488. [zero] "f"(0x00)
  1489. : "memory");
  1490. }
  1491. void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
  1492. int src_stride_rgb,
  1493. uint8_t* dst_u,
  1494. uint8_t* dst_v,
  1495. int width) {
  1496. uint64_t src_rgb1;
  1497. uint64_t ftmp[13];
  1498. uint64_t tmp[1];
  1499. const uint64_t value = 0x4040;
  1500. const uint64_t mask_u = 0x0013002500380002;
  1501. const uint64_t mask_v = 0x00020038002f0009;
  1502. __asm__ volatile(
  1503. "dli %[tmp0], 0x0001000100010001 \n\t"
  1504. "dmtc1 %[tmp0], %[ftmp12] \n\t"
  1505. "1: \n\t"
  1506. "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
  1507. "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
  1508. "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
  1509. "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
  1510. "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
  1511. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1512. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1513. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1514. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1515. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1516. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1517. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1518. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1519. "psrlh %[src0], %[src0], %[one] \n\t"
  1520. "pinsrh_0 %[dest0_u], %[src0], %[value] \n\t"
  1521. "dsrl %[dest0_v], %[src0], %[sixteen] \n\t"
  1522. "pinsrh_3 %[dest0_v], %[dest0_v], %[value] \n\t"
  1523. "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
  1524. "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
  1525. "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
  1526. "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
  1527. "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
  1528. "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
  1529. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1530. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1531. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1532. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1533. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1534. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1535. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1536. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1537. "psrlh %[src0], %[src0], %[one] \n\t"
  1538. "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
  1539. "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
  1540. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  1541. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1542. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1543. "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
  1544. "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
  1545. "psubw %[dest0_u], %[src0], %[src1] \n\t"
  1546. "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
  1547. "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
  1548. "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
  1549. "psubw %[dest0_v], %[src1], %[src0] \n\t"
  1550. "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
  1551. "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
  1552. "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
  1553. "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
  1554. "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
  1555. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1556. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1557. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1558. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1559. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1560. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1561. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1562. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1563. "psrlh %[src0], %[src0], %[one] \n\t"
  1564. "pinsrh_0 %[dest1_u], %[src0], %[value] \n\t"
  1565. "dsrl %[dest1_v], %[src0], %[sixteen] \n\t"
  1566. "pinsrh_3 %[dest1_v], %[dest1_v], %[value] \n\t"
  1567. "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
  1568. "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
  1569. "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
  1570. "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
  1571. "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
  1572. "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
  1573. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1574. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1575. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1576. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1577. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1578. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1579. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1580. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1581. "psrlh %[src0], %[src0], %[one] \n\t"
  1582. "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
  1583. "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
  1584. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  1585. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1586. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1587. "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
  1588. "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
  1589. "psubw %[dest1_u], %[src0], %[src1] \n\t"
  1590. "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
  1591. "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
  1592. "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
  1593. "psubw %[dest1_v], %[src1], %[src0] \n\t"
  1594. "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
  1595. "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
  1596. "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
  1597. "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
  1598. "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
  1599. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1600. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1601. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1602. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1603. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1604. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1605. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1606. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1607. "psrlh %[src0], %[src0], %[one] \n\t"
  1608. "pinsrh_0 %[dest2_u], %[src0], %[value] \n\t"
  1609. "dsrl %[dest2_v], %[src0], %[sixteen] \n\t"
  1610. "pinsrh_3 %[dest2_v], %[dest2_v], %[value] \n\t"
  1611. "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
  1612. "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
  1613. "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
  1614. "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
  1615. "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
  1616. "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
  1617. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1618. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1619. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1620. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1621. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1622. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1623. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1624. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1625. "psrlh %[src0], %[src0], %[one] \n\t"
  1626. "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
  1627. "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
  1628. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  1629. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1630. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1631. "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
  1632. "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
  1633. "psubw %[dest2_u], %[src0], %[src1] \n\t"
  1634. "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
  1635. "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
  1636. "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
  1637. "psubw %[dest2_v], %[src1], %[src0] \n\t"
  1638. "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
  1639. "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
  1640. "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
  1641. "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
  1642. "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
  1643. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1644. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1645. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1646. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1647. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1648. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1649. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1650. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1651. "psrlh %[src0], %[src0], %[one] \n\t"
  1652. "pinsrh_0 %[dest3_u], %[src0], %[value] \n\t"
  1653. "dsrl %[dest3_v], %[src0], %[sixteen] \n\t"
  1654. "pinsrh_3 %[dest3_v], %[dest3_v], %[value] \n\t"
  1655. "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
  1656. "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
  1657. "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
  1658. "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
  1659. "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
  1660. "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
  1661. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1662. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1663. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1664. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1665. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1666. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1667. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1668. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1669. "psrlh %[src0], %[src0], %[one] \n\t"
  1670. "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
  1671. "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
  1672. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  1673. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1674. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1675. "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
  1676. "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
  1677. "psubw %[dest3_u], %[src0], %[src1] \n\t"
  1678. "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
  1679. "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
  1680. "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
  1681. "psubw %[dest3_v], %[src1], %[src0] \n\t"
  1682. "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
  1683. "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
  1684. "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
  1685. "packushb %[dest0_u], %[src0], %[src1] \n\t"
  1686. "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
  1687. "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
  1688. "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
  1689. "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
  1690. "packushb %[dest0_v], %[src0], %[src1] \n\t"
  1691. "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
  1692. "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
  1693. "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
  1694. "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
  1695. "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
  1696. "daddi %[width], %[width], -0x10 \n\t"
  1697. "bgtz %[width], 1b \n\t"
  1698. : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
  1699. [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
  1700. [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
  1701. [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
  1702. [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
  1703. [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
  1704. [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
  1705. : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
  1706. [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
  1707. [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
  1708. [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
  1709. [sixteen] "f"(0x10)
  1710. : "memory");
  1711. }
  1712. void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  1713. uint64_t src, src_hi, src_lo;
  1714. uint64_t dest0, dest1, dest2, dest3;
  1715. const uint64_t value = 0x1080;
  1716. const uint64_t mask = 0x0001004200810019;
  1717. __asm__ volatile(
  1718. "1: \n\t"
  1719. "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
  1720. "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
  1721. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  1722. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  1723. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  1724. "dsll %[src], %[src], %[eight] \n\t"
  1725. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  1726. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  1727. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  1728. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  1729. "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
  1730. "paddw %[dest0], %[dest0], %[src] \n\t"
  1731. "psrlw %[dest0], %[dest0], %[eight] \n\t"
  1732. "gsldlc1 %[src], 0x0d(%[src_argb0]) \n\t"
  1733. "gsldrc1 %[src], 0x06(%[src_argb0]) \n\t"
  1734. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  1735. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  1736. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  1737. "dsll %[src], %[src], %[eight] \n\t"
  1738. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  1739. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  1740. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  1741. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  1742. "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
  1743. "paddw %[dest1], %[dest1], %[src] \n\t"
  1744. "psrlw %[dest1], %[dest1], %[eight] \n\t"
  1745. "gsldlc1 %[src], 0x13(%[src_argb0]) \n\t"
  1746. "gsldrc1 %[src], 0x0c(%[src_argb0]) \n\t"
  1747. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  1748. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  1749. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  1750. "dsll %[src], %[src], %[eight] \n\t"
  1751. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  1752. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  1753. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  1754. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  1755. "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
  1756. "paddw %[dest2], %[dest2], %[src] \n\t"
  1757. "psrlw %[dest2], %[dest2], %[eight] \n\t"
  1758. "gsldlc1 %[src], 0x19(%[src_argb0]) \n\t"
  1759. "gsldrc1 %[src], 0x12(%[src_argb0]) \n\t"
  1760. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  1761. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  1762. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  1763. "dsll %[src], %[src], %[eight] \n\t"
  1764. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  1765. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  1766. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  1767. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  1768. "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
  1769. "paddw %[dest3], %[dest3], %[src] \n\t"
  1770. "psrlw %[dest3], %[dest3], %[eight] \n\t"
  1771. "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
  1772. "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
  1773. "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
  1774. "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
  1775. "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
  1776. "daddiu %[src_argb0], %[src_argb0], 0x18 \n\t"
  1777. "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
  1778. "daddi %[width], %[width], -0x08 \n\t"
  1779. "bnez %[width], 1b \n\t"
  1780. : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
  1781. [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
  1782. [dest3] "=&f"(dest3)
  1783. : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
  1784. [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
  1785. [zero] "f"(0x00)
  1786. : "memory");
  1787. }
  1788. void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
  1789. int src_stride_rgb,
  1790. uint8_t* dst_u,
  1791. uint8_t* dst_v,
  1792. int width) {
  1793. uint64_t src_rgb1;
  1794. uint64_t ftmp[13];
  1795. uint64_t tmp[1];
  1796. const uint64_t value = 0x4040;
  1797. const uint64_t mask_u = 0x0013002500380002;
  1798. const uint64_t mask_v = 0x00020038002f0009;
  1799. __asm__ volatile(
  1800. "dli %[tmp0], 0x0001000100010001 \n\t"
  1801. "dmtc1 %[tmp0], %[ftmp12] \n\t"
  1802. "1: \n\t"
  1803. "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
  1804. "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
  1805. "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
  1806. "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
  1807. "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
  1808. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1809. "dsll %[src0], %[src0], %[eight] \n\t"
  1810. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1811. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1812. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1813. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1814. "dsll %[src1], %[src1], %[eight] \n\t"
  1815. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1816. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1817. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1818. "psrlh %[src0], %[src0], %[one] \n\t"
  1819. "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
  1820. "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
  1821. "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
  1822. "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
  1823. "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
  1824. "gsldrc1 %[src0], 0x06(%[src_rgb0]) \n\t"
  1825. "gsldlc1 %[src0], 0x0d(%[src_rgb0]) \n\t"
  1826. "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
  1827. "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
  1828. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1829. "dsll %[src0], %[src0], %[eight] \n\t"
  1830. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1831. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1832. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1833. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1834. "dsll %[src1], %[src1], %[eight] \n\t"
  1835. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1836. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1837. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1838. "psrlh %[src0], %[src0], %[one] \n\t"
  1839. "dsll %[src_lo], %[src0], %[sixteen] \n\t"
  1840. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  1841. "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
  1842. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1843. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1844. "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
  1845. "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
  1846. "psubw %[dest0_u], %[src0], %[src1] \n\t"
  1847. "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
  1848. "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
  1849. "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
  1850. "psubw %[dest0_v], %[src1], %[src0] \n\t"
  1851. "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
  1852. "gsldrc1 %[src0], 0x0c(%[src_rgb0]) \n\t"
  1853. "gsldlc1 %[src0], 0x13(%[src_rgb0]) \n\t"
  1854. "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
  1855. "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
  1856. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1857. "dsll %[src0], %[src0], %[eight] \n\t"
  1858. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1859. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1860. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1861. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1862. "dsll %[src1], %[src1], %[eight] \n\t"
  1863. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1864. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1865. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1866. "psrlh %[src0], %[src0], %[one] \n\t"
  1867. "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
  1868. "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
  1869. "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
  1870. "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
  1871. "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
  1872. "gsldrc1 %[src0], 0x12(%[src_rgb0]) \n\t"
  1873. "gsldlc1 %[src0], 0x19(%[src_rgb0]) \n\t"
  1874. "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
  1875. "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
  1876. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1877. "dsll %[src0], %[src0], %[eight] \n\t"
  1878. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1879. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1880. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1881. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1882. "dsll %[src1], %[src1], %[eight] \n\t"
  1883. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1884. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1885. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1886. "psrlh %[src0], %[src0], %[one] \n\t"
  1887. "dsll %[src_lo], %[src0], %[sixteen] \n\t"
  1888. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  1889. "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
  1890. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1891. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1892. "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
  1893. "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
  1894. "psubw %[dest1_u], %[src0], %[src1] \n\t"
  1895. "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
  1896. "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
  1897. "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
  1898. "psubw %[dest1_v], %[src1], %[src0] \n\t"
  1899. "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
  1900. "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
  1901. "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
  1902. "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
  1903. "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
  1904. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1905. "dsll %[src0], %[src0], %[eight] \n\t"
  1906. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1907. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1908. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1909. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1910. "dsll %[src1], %[src1], %[eight] \n\t"
  1911. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1912. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1913. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1914. "psrlh %[src0], %[src0], %[one] \n\t"
  1915. "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
  1916. "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
  1917. "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
  1918. "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
  1919. "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
  1920. "gsldrc1 %[src0], 0x1e(%[src_rgb0]) \n\t"
  1921. "gsldlc1 %[src0], 0x25(%[src_rgb0]) \n\t"
  1922. "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
  1923. "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
  1924. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1925. "dsll %[src0], %[src0], %[eight] \n\t"
  1926. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1927. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1928. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1929. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1930. "dsll %[src1], %[src1], %[eight] \n\t"
  1931. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1932. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1933. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1934. "psrlh %[src0], %[src0], %[one] \n\t"
  1935. "dsll %[src_lo], %[src0], %[sixteen] \n\t"
  1936. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  1937. "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
  1938. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1939. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1940. "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
  1941. "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
  1942. "psubw %[dest2_u], %[src0], %[src1] \n\t"
  1943. "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
  1944. "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
  1945. "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
  1946. "psubw %[dest2_v], %[src1], %[src0] \n\t"
  1947. "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
  1948. "gsldrc1 %[src0], 0x24(%[src_rgb0]) \n\t"
  1949. "gsldlc1 %[src0], 0x2b(%[src_rgb0]) \n\t"
  1950. "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
  1951. "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
  1952. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1953. "dsll %[src0], %[src0], %[eight] \n\t"
  1954. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1955. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1956. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1957. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1958. "dsll %[src1], %[src1], %[eight] \n\t"
  1959. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1960. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1961. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1962. "psrlh %[src0], %[src0], %[one] \n\t"
  1963. "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
  1964. "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
  1965. "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
  1966. "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
  1967. "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
  1968. "gsldrc1 %[src0], 0x2a(%[src_rgb0]) \n\t"
  1969. "gsldlc1 %[src0], 0x31(%[src_rgb0]) \n\t"
  1970. "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
  1971. "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
  1972. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  1973. "dsll %[src0], %[src0], %[eight] \n\t"
  1974. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  1975. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  1976. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  1977. "paddh %[src0], %[src0], %[src_lo] \n\t"
  1978. "dsll %[src1], %[src1], %[eight] \n\t"
  1979. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  1980. "paddh %[src0], %[src0], %[src_hi] \n\t"
  1981. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  1982. "psrlh %[src0], %[src0], %[one] \n\t"
  1983. "dsll %[src_lo], %[src0], %[sixteen] \n\t"
  1984. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  1985. "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
  1986. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  1987. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  1988. "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
  1989. "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
  1990. "psubw %[dest3_u], %[src0], %[src1] \n\t"
  1991. "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
  1992. "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
  1993. "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
  1994. "psubw %[dest3_v], %[src1], %[src0] \n\t"
  1995. "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
  1996. "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
  1997. "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
  1998. "packushb %[dest0_u], %[src0], %[src1] \n\t"
  1999. "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
  2000. "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
  2001. "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
  2002. "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
  2003. "packushb %[dest0_v], %[src0], %[src1] \n\t"
  2004. "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
  2005. "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
  2006. "daddiu %[src_rgb0], %[src_rgb0], 0x30 \n\t"
  2007. "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
  2008. "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
  2009. "daddi %[width], %[width], -0x10 \n\t"
  2010. "bgtz %[width], 1b \n\t"
  2011. : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
  2012. [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
  2013. [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
  2014. [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
  2015. [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
  2016. [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
  2017. [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
  2018. : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
  2019. [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
  2020. [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
  2021. [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
  2022. [sixteen] "f"(0x10)
  2023. : "memory");
  2024. }
  2025. void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  2026. uint64_t src, src_hi, src_lo;
  2027. uint64_t dest0, dest1, dest2, dest3;
  2028. const uint64_t value = 0x1080;
  2029. const uint64_t mask = 0x0001001900810042;
  2030. __asm__ volatile(
  2031. "1: \n\t"
  2032. "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
  2033. "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
  2034. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  2035. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  2036. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2037. "dsll %[src], %[src], %[eight] \n\t"
  2038. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  2039. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  2040. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2041. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  2042. "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
  2043. "paddw %[dest0], %[dest0], %[src] \n\t"
  2044. "psrlw %[dest0], %[dest0], %[eight] \n\t"
  2045. "gsldlc1 %[src], 0x0d(%[src_argb0]) \n\t"
  2046. "gsldrc1 %[src], 0x06(%[src_argb0]) \n\t"
  2047. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  2048. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  2049. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2050. "dsll %[src], %[src], %[eight] \n\t"
  2051. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  2052. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  2053. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2054. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  2055. "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
  2056. "paddw %[dest1], %[dest1], %[src] \n\t"
  2057. "psrlw %[dest1], %[dest1], %[eight] \n\t"
  2058. "gsldlc1 %[src], 0x13(%[src_argb0]) \n\t"
  2059. "gsldrc1 %[src], 0x0c(%[src_argb0]) \n\t"
  2060. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  2061. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  2062. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2063. "dsll %[src], %[src], %[eight] \n\t"
  2064. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  2065. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  2066. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2067. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  2068. "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
  2069. "paddw %[dest2], %[dest2], %[src] \n\t"
  2070. "psrlw %[dest2], %[dest2], %[eight] \n\t"
  2071. "gsldlc1 %[src], 0x19(%[src_argb0]) \n\t"
  2072. "gsldrc1 %[src], 0x12(%[src_argb0]) \n\t"
  2073. "punpcklbh %[src_lo], %[src], %[zero] \n\t"
  2074. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  2075. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2076. "dsll %[src], %[src], %[eight] \n\t"
  2077. "punpckhbh %[src_hi], %[src], %[zero] \n\t"
  2078. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  2079. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2080. "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
  2081. "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
  2082. "paddw %[dest3], %[dest3], %[src] \n\t"
  2083. "psrlw %[dest3], %[dest3], %[eight] \n\t"
  2084. "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
  2085. "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
  2086. "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
  2087. "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
  2088. "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
  2089. "daddiu %[src_argb0], %[src_argb0], 0x18 \n\t"
  2090. "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
  2091. "daddi %[width], %[width], -0x08 \n\t"
  2092. "bnez %[width], 1b \n\t"
  2093. : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
  2094. [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
  2095. [dest3] "=&f"(dest3)
  2096. : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
  2097. [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
  2098. [zero] "f"(0x00)
  2099. : "memory");
  2100. }
  2101. void RAWToUVRow_MMI(const uint8_t* src_rgb0,
  2102. int src_stride_rgb,
  2103. uint8_t* dst_u,
  2104. uint8_t* dst_v,
  2105. int width) {
  2106. uint64_t src_rgb1;
  2107. uint64_t ftmp[13];
  2108. uint64_t tmp[1];
  2109. const uint64_t value = 0x4040;
  2110. const uint64_t mask_u = 0x0002003800250013;
  2111. const uint64_t mask_v = 0x0009002f00380002;
  2112. __asm__ volatile(
  2113. "dli %[tmp0], 0x0001000100010001 \n\t"
  2114. "dmtc1 %[tmp0], %[ftmp12] \n\t"
  2115. "1: \n\t"
  2116. "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
  2117. "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
  2118. "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
  2119. "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
  2120. "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
  2121. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2122. "dsll %[src0], %[src0], %[eight] \n\t"
  2123. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2124. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  2125. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  2126. "paddh %[src0], %[src0], %[src_lo] \n\t"
  2127. "dsll %[src1], %[src1], %[eight] \n\t"
  2128. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  2129. "paddh %[src0], %[src0], %[src_hi] \n\t"
  2130. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  2131. "psrlh %[src0], %[src0], %[one] \n\t"
  2132. "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
  2133. "dsll %[dest0_v], %[src0], %[sixteen] \n\t"
  2134. "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
  2135. "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
  2136. "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
  2137. "gsldrc1 %[src0], 0x06(%[src_rgb0]) \n\t"
  2138. "gsldlc1 %[src0], 0x0d(%[src_rgb0]) \n\t"
  2139. "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
  2140. "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
  2141. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2142. "dsll %[src0], %[src0], %[eight] \n\t"
  2143. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2144. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  2145. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  2146. "paddh %[src0], %[src0], %[src_lo] \n\t"
  2147. "dsll %[src1], %[src1], %[eight] \n\t"
  2148. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  2149. "paddh %[src0], %[src0], %[src_hi] \n\t"
  2150. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  2151. "psrlh %[src0], %[src0], %[one] \n\t"
  2152. "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
  2153. "dsll %[src_hi], %[src0], %[sixteen] \n\t"
  2154. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  2155. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  2156. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  2157. "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
  2158. "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
  2159. "psubw %[dest0_u], %[src1], %[src0] \n\t"
  2160. "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
  2161. "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
  2162. "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
  2163. "psubw %[dest0_v], %[src0], %[src1] \n\t"
  2164. "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
  2165. "gsldrc1 %[src0], 0x0c(%[src_rgb0]) \n\t"
  2166. "gsldlc1 %[src0], 0x13(%[src_rgb0]) \n\t"
  2167. "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
  2168. "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
  2169. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2170. "dsll %[src0], %[src0], %[eight] \n\t"
  2171. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2172. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  2173. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  2174. "paddh %[src0], %[src0], %[src_lo] \n\t"
  2175. "dsll %[src1], %[src1], %[eight] \n\t"
  2176. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  2177. "paddh %[src0], %[src0], %[src_hi] \n\t"
  2178. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  2179. "psrlh %[src0], %[src0], %[one] \n\t"
  2180. "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
  2181. "dsll %[dest1_v], %[src0], %[sixteen] \n\t"
  2182. "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
  2183. "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
  2184. "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
  2185. "gsldrc1 %[src0], 0x12(%[src_rgb0]) \n\t"
  2186. "gsldlc1 %[src0], 0x19(%[src_rgb0]) \n\t"
  2187. "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
  2188. "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
  2189. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2190. "dsll %[src0], %[src0], %[eight] \n\t"
  2191. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2192. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  2193. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  2194. "paddh %[src0], %[src0], %[src_lo] \n\t"
  2195. "dsll %[src1], %[src1], %[eight] \n\t"
  2196. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  2197. "paddh %[src0], %[src0], %[src_hi] \n\t"
  2198. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  2199. "psrlh %[src0], %[src0], %[one] \n\t"
  2200. "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
  2201. "dsll %[src_hi], %[src0], %[sixteen] \n\t"
  2202. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  2203. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  2204. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  2205. "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
  2206. "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
  2207. "psubw %[dest1_u], %[src1], %[src0] \n\t"
  2208. "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
  2209. "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
  2210. "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
  2211. "psubw %[dest1_v], %[src0], %[src1] \n\t"
  2212. "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
  2213. "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
  2214. "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
  2215. "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
  2216. "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
  2217. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2218. "dsll %[src0], %[src0], %[eight] \n\t"
  2219. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2220. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  2221. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  2222. "paddh %[src0], %[src0], %[src_lo] \n\t"
  2223. "dsll %[src1], %[src1], %[eight] \n\t"
  2224. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  2225. "paddh %[src0], %[src0], %[src_hi] \n\t"
  2226. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  2227. "psrlh %[src0], %[src0], %[one] \n\t"
  2228. "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
  2229. "dsll %[dest2_v], %[src0], %[sixteen] \n\t"
  2230. "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
  2231. "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
  2232. "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
  2233. "gsldrc1 %[src0], 0x1e(%[src_rgb0]) \n\t"
  2234. "gsldlc1 %[src0], 0x25(%[src_rgb0]) \n\t"
  2235. "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
  2236. "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
  2237. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2238. "dsll %[src0], %[src0], %[eight] \n\t"
  2239. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2240. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  2241. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  2242. "paddh %[src0], %[src0], %[src_lo] \n\t"
  2243. "dsll %[src1], %[src1], %[eight] \n\t"
  2244. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  2245. "paddh %[src0], %[src0], %[src_hi] \n\t"
  2246. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  2247. "psrlh %[src0], %[src0], %[one] \n\t"
  2248. "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
  2249. "dsll %[src_hi], %[src0], %[sixteen] \n\t"
  2250. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  2251. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  2252. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  2253. "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
  2254. "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
  2255. "psubw %[dest2_u], %[src1], %[src0] \n\t"
  2256. "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
  2257. "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
  2258. "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
  2259. "psubw %[dest2_v], %[src0], %[src1] \n\t"
  2260. "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
  2261. "gsldrc1 %[src0], 0x24(%[src_rgb0]) \n\t"
  2262. "gsldlc1 %[src0], 0x2b(%[src_rgb0]) \n\t"
  2263. "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
  2264. "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
  2265. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2266. "dsll %[src0], %[src0], %[eight] \n\t"
  2267. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2268. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  2269. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  2270. "paddh %[src0], %[src0], %[src_lo] \n\t"
  2271. "dsll %[src1], %[src1], %[eight] \n\t"
  2272. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  2273. "paddh %[src0], %[src0], %[src_hi] \n\t"
  2274. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  2275. "psrlh %[src0], %[src0], %[one] \n\t"
  2276. "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
  2277. "dsll %[dest3_v], %[src0], %[sixteen] \n\t"
  2278. "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
  2279. "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
  2280. "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
  2281. "gsldrc1 %[src0], 0x2a(%[src_rgb0]) \n\t"
  2282. "gsldlc1 %[src0], 0x31(%[src_rgb0]) \n\t"
  2283. "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
  2284. "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
  2285. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2286. "dsll %[src0], %[src0], %[eight] \n\t"
  2287. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2288. "paddh %[src0], %[src_lo], %[src_hi] \n\t"
  2289. "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
  2290. "paddh %[src0], %[src0], %[src_lo] \n\t"
  2291. "dsll %[src1], %[src1], %[eight] \n\t"
  2292. "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
  2293. "paddh %[src0], %[src0], %[src_hi] \n\t"
  2294. "paddh %[src0], %[src0], %[ftmp12] \n\t"
  2295. "psrlh %[src0], %[src0], %[one] \n\t"
  2296. "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
  2297. "dsll %[src_hi], %[src0], %[sixteen] \n\t"
  2298. "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
  2299. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  2300. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  2301. "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
  2302. "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
  2303. "psubw %[dest3_u], %[src1], %[src0] \n\t"
  2304. "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
  2305. "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
  2306. "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
  2307. "psubw %[dest3_v], %[src0], %[src1] \n\t"
  2308. "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
  2309. "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
  2310. "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
  2311. "packushb %[dest0_u], %[src0], %[src1] \n\t"
  2312. "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
  2313. "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
  2314. "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
  2315. "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
  2316. "packushb %[dest0_v], %[src0], %[src1] \n\t"
  2317. "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
  2318. "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
  2319. "daddiu %[src_rgb0], %[src_rgb0], 0x30 \n\t"
  2320. "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
  2321. "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
  2322. "daddi %[width], %[width], -0x10 \n\t"
  2323. "bgtz %[width], 1b \n\t"
  2324. : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
  2325. [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
  2326. [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
  2327. [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
  2328. [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
  2329. [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
  2330. [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
  2331. : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
  2332. [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
  2333. [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
  2334. [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
  2335. [sixteen] "f"(0x10)
  2336. : "memory");
  2337. }
  2338. void ARGBToYJRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  2339. uint64_t src, src_hi, src_lo;
  2340. uint64_t dest, dest0, dest1, dest2, dest3;
  2341. uint64_t tmp0, tmp1;
  2342. const uint64_t shift = 0x08;
  2343. const uint64_t value = 0x80;
  2344. const uint64_t mask0 = 0x0;
  2345. const uint64_t mask1 = 0x0001004D0096001DULL;
  2346. __asm__ volatile(
  2347. "1: \n\t"
  2348. "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
  2349. "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
  2350. "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
  2351. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  2352. "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
  2353. "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
  2354. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  2355. "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
  2356. "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
  2357. "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
  2358. "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
  2359. "psrlw %[dest0], %[dest0], %[shift] \n\t"
  2360. "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t"
  2361. "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t"
  2362. "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
  2363. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  2364. "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
  2365. "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
  2366. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  2367. "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
  2368. "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
  2369. "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
  2370. "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
  2371. "psrlw %[dest1], %[dest1], %[shift] \n\t"
  2372. "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t"
  2373. "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t"
  2374. "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
  2375. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  2376. "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
  2377. "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
  2378. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  2379. "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
  2380. "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
  2381. "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
  2382. "paddw %[dest2], %[tmp0], %[tmp1] \n\t"
  2383. "psrlw %[dest2], %[dest2], %[shift] \n\t"
  2384. "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t"
  2385. "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t"
  2386. "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
  2387. "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
  2388. "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
  2389. "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
  2390. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  2391. "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
  2392. "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
  2393. "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
  2394. "paddw %[dest3], %[tmp0], %[tmp1] \n\t"
  2395. "psrlw %[dest3], %[dest3], %[shift] \n\t"
  2396. "packsswh %[tmp0], %[dest0], %[dest1] \n\t"
  2397. "packsswh %[tmp1], %[dest2], %[dest3] \n\t"
  2398. "packushb %[dest], %[tmp0], %[tmp1] \n\t"
  2399. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  2400. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  2401. "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
  2402. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  2403. "daddi %[width], %[width], -0x08 \n\t"
  2404. "bnez %[width], 1b \n\t"
  2405. : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
  2406. [src_lo] "=&f"(src_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1),
  2407. [dest2] "=&f"(dest2), [dest3] "=&f"(dest3), [tmp0] "=&f"(tmp0),
  2408. [tmp1] "=&f"(tmp1)
  2409. : [src_ptr] "r"(src_argb0), [dst_ptr] "r"(dst_y), [mask0] "f"(mask0),
  2410. [mask1] "f"(mask1), [shift] "f"(shift), [value] "f"(value),
  2411. [width] "r"(width)
  2412. : "memory");
  2413. }
  2414. void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
  2415. int src_stride_rgb,
  2416. uint8_t* dst_u,
  2417. uint8_t* dst_v,
  2418. int width) {
  2419. uint64_t src_rgb1;
  2420. uint64_t ftmp[12];
  2421. const uint64_t value = 0x4040;
  2422. const uint64_t mask_u = 0x0015002a003f0002;
  2423. const uint64_t mask_v = 0x0002003f0035000a;
  2424. __asm__ volatile(
  2425. "1: \n\t"
  2426. "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
  2427. "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
  2428. "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
  2429. "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
  2430. "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
  2431. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2432. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2433. "punpcklbh %[src0], %[src1], %[zero] \n\t"
  2434. "punpckhbh %[src1], %[src1], %[zero] \n\t"
  2435. "paddh %[src0], %[src_lo], %[src0] \n\t"
  2436. "paddh %[src1], %[src_hi], %[src1] \n\t"
  2437. "pavgh %[src0], %[src0], %[src1] \n\t"
  2438. "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
  2439. "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
  2440. "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
  2441. "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
  2442. "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
  2443. "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
  2444. "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
  2445. "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
  2446. "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
  2447. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2448. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2449. "punpcklbh %[src0], %[src1], %[zero] \n\t"
  2450. "punpckhbh %[src1], %[src1], %[zero] \n\t"
  2451. "paddh %[src0], %[src_lo], %[src0] \n\t"
  2452. "paddh %[src1], %[src_hi], %[src1] \n\t"
  2453. "pavgh %[src0], %[src0], %[src1] \n\t"
  2454. "dsll %[src_lo], %[src0], %[sixteen] \n\t"
  2455. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  2456. "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
  2457. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  2458. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  2459. "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
  2460. "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
  2461. "psubw %[dest0_u], %[src0], %[src1] \n\t"
  2462. "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
  2463. "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
  2464. "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
  2465. "psubw %[dest0_v], %[src1], %[src0] \n\t"
  2466. "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
  2467. "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
  2468. "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
  2469. "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
  2470. "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
  2471. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2472. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2473. "punpcklbh %[src0], %[src1], %[zero] \n\t"
  2474. "punpckhbh %[src1], %[src1], %[zero] \n\t"
  2475. "paddh %[src0], %[src_lo], %[src0] \n\t"
  2476. "paddh %[src1], %[src_hi], %[src1] \n\t"
  2477. "pavgh %[src0], %[src0], %[src1] \n\t"
  2478. "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
  2479. "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
  2480. "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
  2481. "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
  2482. "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
  2483. "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
  2484. "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
  2485. "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
  2486. "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
  2487. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2488. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2489. "punpcklbh %[src0], %[src1], %[zero] \n\t"
  2490. "punpckhbh %[src1], %[src1], %[zero] \n\t"
  2491. "paddh %[src0], %[src_lo], %[src0] \n\t"
  2492. "paddh %[src1], %[src_hi], %[src1] \n\t"
  2493. "pavgh %[src0], %[src0], %[src1] \n\t"
  2494. "dsll %[src_lo], %[src0], %[sixteen] \n\t"
  2495. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  2496. "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
  2497. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  2498. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  2499. "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
  2500. "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
  2501. "psubw %[dest1_u], %[src0], %[src1] \n\t"
  2502. "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
  2503. "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
  2504. "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
  2505. "psubw %[dest1_v], %[src1], %[src0] \n\t"
  2506. "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
  2507. "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
  2508. "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
  2509. "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
  2510. "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
  2511. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2512. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2513. "punpcklbh %[src0], %[src1], %[zero] \n\t"
  2514. "punpckhbh %[src1], %[src1], %[zero] \n\t"
  2515. "paddh %[src0], %[src_lo], %[src0] \n\t"
  2516. "paddh %[src1], %[src_hi], %[src1] \n\t"
  2517. "pavgh %[src0], %[src0], %[src1] \n\t"
  2518. "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
  2519. "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
  2520. "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
  2521. "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
  2522. "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
  2523. "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
  2524. "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
  2525. "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
  2526. "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
  2527. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2528. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2529. "punpcklbh %[src0], %[src1], %[zero] \n\t"
  2530. "punpckhbh %[src1], %[src1], %[zero] \n\t"
  2531. "paddh %[src0], %[src_lo], %[src0] \n\t"
  2532. "paddh %[src1], %[src_hi], %[src1] \n\t"
  2533. "pavgh %[src0], %[src0], %[src1] \n\t"
  2534. "dsll %[src_lo], %[src0], %[sixteen] \n\t"
  2535. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  2536. "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
  2537. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  2538. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  2539. "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
  2540. "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
  2541. "psubw %[dest2_u], %[src0], %[src1] \n\t"
  2542. "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
  2543. "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
  2544. "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
  2545. "psubw %[dest2_v], %[src1], %[src0] \n\t"
  2546. "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
  2547. "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
  2548. "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
  2549. "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
  2550. "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
  2551. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2552. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2553. "punpcklbh %[src0], %[src1], %[zero] \n\t"
  2554. "punpckhbh %[src1], %[src1], %[zero] \n\t"
  2555. "paddh %[src0], %[src_lo], %[src0] \n\t"
  2556. "paddh %[src1], %[src_hi], %[src1] \n\t"
  2557. "pavgh %[src0], %[src0], %[src1] \n\t"
  2558. "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
  2559. "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
  2560. "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
  2561. "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
  2562. "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
  2563. "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
  2564. "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
  2565. "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
  2566. "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
  2567. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  2568. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  2569. "punpcklbh %[src0], %[src1], %[zero] \n\t"
  2570. "punpckhbh %[src1], %[src1], %[zero] \n\t"
  2571. "paddh %[src0], %[src_lo], %[src0] \n\t"
  2572. "paddh %[src1], %[src_hi], %[src1] \n\t"
  2573. "pavgh %[src0], %[src0], %[src1] \n\t"
  2574. "dsll %[src_lo], %[src0], %[sixteen] \n\t"
  2575. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  2576. "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
  2577. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  2578. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  2579. "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
  2580. "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
  2581. "psubw %[dest3_u], %[src0], %[src1] \n\t"
  2582. "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
  2583. "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
  2584. "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
  2585. "psubw %[dest3_v], %[src1], %[src0] \n\t"
  2586. "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
  2587. "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
  2588. "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
  2589. "packushb %[dest0_u], %[src0], %[src1] \n\t"
  2590. "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
  2591. "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
  2592. "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
  2593. "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
  2594. "packushb %[dest0_v], %[src0], %[src1] \n\t"
  2595. "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
  2596. "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
  2597. "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
  2598. "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
  2599. "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
  2600. "daddi %[width], %[width], -0x10 \n\t"
  2601. "bgtz %[width], 1b \n\t"
  2602. : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
  2603. [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
  2604. [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
  2605. [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
  2606. [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
  2607. [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
  2608. : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
  2609. [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
  2610. [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
  2611. [zero] "f"(0x00), [eight] "f"(0x08),
  2612. [sixteen] "f"(0x10)
  2613. : "memory");
  2614. }
  2615. void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
  2616. uint64_t ftmp[11];
  2617. const uint64_t value = 0x1080108010801080;
  2618. const uint64_t mask = 0x0001004200810019;
  2619. uint64_t c0 = 0x001f001f001f001f;
  2620. uint64_t c1 = 0x00ff00ff00ff00ff;
  2621. uint64_t c2 = 0x0007000700070007;
  2622. __asm__ volatile(
  2623. "1: \n\t"
  2624. "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
  2625. "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
  2626. "psrlh %[src1], %[src0], %[eight] \n\t"
  2627. "and %[b], %[src0], %[c0] \n\t"
  2628. "and %[src0], %[src0], %[c1] \n\t"
  2629. "psrlh %[src0], %[src0], %[five] \n\t"
  2630. "and %[g], %[src1], %[c2] \n\t"
  2631. "psllh %[g], %[g], %[three] \n\t"
  2632. "or %[g], %[src0], %[g] \n\t"
  2633. "psrlh %[r], %[src1], %[three] \n\t"
  2634. "psllh %[src0], %[b], %[three] \n\t"
  2635. "psrlh %[src1], %[b], %[two] \n\t"
  2636. "or %[b], %[src0], %[src1] \n\t"
  2637. "psllh %[src0], %[g], %[two] \n\t"
  2638. "psrlh %[src1], %[g], %[four] \n\t"
  2639. "or %[g], %[src0], %[src1] \n\t"
  2640. "psllh %[src0], %[r], %[three] \n\t"
  2641. "psrlh %[src1], %[r], %[two] \n\t"
  2642. "or %[r], %[src0], %[src1] \n\t"
  2643. "punpcklhw %[src0], %[b], %[r] \n\t"
  2644. "punpcklhw %[src1], %[g], %[value] \n\t"
  2645. "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
  2646. "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
  2647. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2648. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2649. "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
  2650. "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
  2651. "paddw %[dest0], %[src0], %[src1] \n\t"
  2652. "psrlw %[dest0], %[dest0], %[eight] \n\t"
  2653. "punpckhhw %[src0], %[b], %[r] \n\t"
  2654. "punpckhhw %[src1], %[g], %[value] \n\t"
  2655. "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
  2656. "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
  2657. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2658. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2659. "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
  2660. "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
  2661. "paddw %[dest1], %[src0], %[src1] \n\t"
  2662. "psrlw %[dest1], %[dest1], %[eight] \n\t"
  2663. "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t"
  2664. "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t"
  2665. "psrlh %[src1], %[src0], %[eight] \n\t"
  2666. "and %[b], %[src0], %[c0] \n\t"
  2667. "and %[src0], %[src0], %[c1] \n\t"
  2668. "psrlh %[src0], %[src0], %[five] \n\t"
  2669. "and %[g], %[src1], %[c2] \n\t"
  2670. "psllh %[g], %[g], %[three] \n\t"
  2671. "or %[g], %[src0], %[g] \n\t"
  2672. "psrlh %[r], %[src1], %[three] \n\t"
  2673. "psllh %[src0], %[b], %[three] \n\t"
  2674. "psrlh %[src1], %[b], %[two] \n\t"
  2675. "or %[b], %[src0], %[src1] \n\t"
  2676. "psllh %[src0], %[g], %[two] \n\t"
  2677. "psrlh %[src1], %[g], %[four] \n\t"
  2678. "or %[g], %[src0], %[src1] \n\t"
  2679. "psllh %[src0], %[r], %[three] \n\t"
  2680. "psrlh %[src1], %[r], %[two] \n\t"
  2681. "or %[r], %[src0], %[src1] \n\t"
  2682. "punpcklhw %[src0], %[b], %[r] \n\t"
  2683. "punpcklhw %[src1], %[g], %[value] \n\t"
  2684. "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
  2685. "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
  2686. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2687. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2688. "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
  2689. "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
  2690. "paddw %[dest2], %[src0], %[src1] \n\t"
  2691. "psrlw %[dest2], %[dest2], %[eight] \n\t"
  2692. "punpckhhw %[src0], %[b], %[r] \n\t"
  2693. "punpckhhw %[src1], %[g], %[value] \n\t"
  2694. "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
  2695. "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
  2696. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2697. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2698. "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
  2699. "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
  2700. "paddw %[dest3], %[src0], %[src1] \n\t"
  2701. "psrlw %[dest3], %[dest3], %[eight] \n\t"
  2702. "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
  2703. "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
  2704. "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
  2705. "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
  2706. "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
  2707. "daddiu %[src_rgb565], %[src_rgb565], 0x10 \n\t"
  2708. "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
  2709. "daddiu %[width], %[width], -0x08 \n\t"
  2710. "bgtz %[width], 1b \n\t"
  2711. : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
  2712. [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
  2713. [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
  2714. [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
  2715. : [src_rgb565] "r"(src_rgb565), [dst_y] "r"(dst_y), [value] "f"(value),
  2716. [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
  2717. [mask] "f"(mask), [eight] "f"(0x08), [five] "f"(0x05),
  2718. [three] "f"(0x03), [two] "f"(0x02), [four] "f"(0x04)
  2719. : "memory");
  2720. }
  2721. void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555,
  2722. uint8_t* dst_y,
  2723. int width) {
  2724. uint64_t ftmp[11];
  2725. const uint64_t value = 0x1080108010801080;
  2726. const uint64_t mask = 0x0001004200810019;
  2727. uint64_t c0 = 0x001f001f001f001f;
  2728. uint64_t c1 = 0x00ff00ff00ff00ff;
  2729. uint64_t c2 = 0x0003000300030003;
  2730. uint64_t c3 = 0x007c007c007c007c;
  2731. __asm__ volatile(
  2732. "1: \n\t"
  2733. "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
  2734. "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
  2735. "psrlh %[src1], %[src0], %[eight] \n\t"
  2736. "and %[b], %[src0], %[c0] \n\t"
  2737. "and %[src0], %[src0], %[c1] \n\t"
  2738. "psrlh %[src0], %[src0], %[five] \n\t"
  2739. "and %[g], %[src1], %[c2] \n\t"
  2740. "psllh %[g], %[g], %[three] \n\t"
  2741. "or %[g], %[src0], %[g] \n\t"
  2742. "and %[r], %[src1], %[c3] \n\t"
  2743. "psrlh %[r], %[r], %[two] \n\t"
  2744. "psllh %[src0], %[b], %[three] \n\t"
  2745. "psrlh %[src1], %[b], %[two] \n\t"
  2746. "or %[b], %[src0], %[src1] \n\t"
  2747. "psllh %[src0], %[g], %[three] \n\t"
  2748. "psrlh %[src1], %[g], %[two] \n\t"
  2749. "or %[g], %[src0], %[src1] \n\t"
  2750. "psllh %[src0], %[r], %[three] \n\t"
  2751. "psrlh %[src1], %[r], %[two] \n\t"
  2752. "or %[r], %[src0], %[src1] \n\t"
  2753. "punpcklhw %[src0], %[b], %[r] \n\t"
  2754. "punpcklhw %[src1], %[g], %[value] \n\t"
  2755. "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
  2756. "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
  2757. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2758. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2759. "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
  2760. "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
  2761. "paddw %[dest0], %[src0], %[src1] \n\t"
  2762. "psrlw %[dest0], %[dest0], %[eight] \n\t"
  2763. "punpckhhw %[src0], %[b], %[r] \n\t"
  2764. "punpckhhw %[src1], %[g], %[value] \n\t"
  2765. "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
  2766. "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
  2767. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2768. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2769. "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
  2770. "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
  2771. "paddw %[dest1], %[src0], %[src1] \n\t"
  2772. "psrlw %[dest1], %[dest1], %[eight] \n\t"
  2773. "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t"
  2774. "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t"
  2775. "psrlh %[src1], %[src0], %[eight] \n\t"
  2776. "and %[b], %[src0], %[c0] \n\t"
  2777. "and %[src0], %[src0], %[c1] \n\t"
  2778. "psrlh %[src0], %[src0], %[five] \n\t"
  2779. "and %[g], %[src1], %[c2] \n\t"
  2780. "psllh %[g], %[g], %[three] \n\t"
  2781. "or %[g], %[src0], %[g] \n\t"
  2782. "and %[r], %[src1], %[c3] \n\t"
  2783. "psrlh %[r], %[r], %[two] \n\t"
  2784. "psllh %[src0], %[b], %[three] \n\t"
  2785. "psrlh %[src1], %[b], %[two] \n\t"
  2786. "or %[b], %[src0], %[src1] \n\t"
  2787. "psllh %[src0], %[g], %[three] \n\t"
  2788. "psrlh %[src1], %[g], %[two] \n\t"
  2789. "or %[g], %[src0], %[src1] \n\t"
  2790. "psllh %[src0], %[r], %[three] \n\t"
  2791. "psrlh %[src1], %[r], %[two] \n\t"
  2792. "or %[r], %[src0], %[src1] \n\t"
  2793. "punpcklhw %[src0], %[b], %[r] \n\t"
  2794. "punpcklhw %[src1], %[g], %[value] \n\t"
  2795. "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
  2796. "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
  2797. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2798. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2799. "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
  2800. "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
  2801. "paddw %[dest2], %[src0], %[src1] \n\t"
  2802. "psrlw %[dest2], %[dest2], %[eight] \n\t"
  2803. "punpckhhw %[src0], %[b], %[r] \n\t"
  2804. "punpckhhw %[src1], %[g], %[value] \n\t"
  2805. "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
  2806. "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
  2807. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2808. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2809. "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
  2810. "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
  2811. "paddw %[dest3], %[src0], %[src1] \n\t"
  2812. "psrlw %[dest3], %[dest3], %[eight] \n\t"
  2813. "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
  2814. "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
  2815. "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
  2816. "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
  2817. "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
  2818. "daddiu %[src_argb1555], %[src_argb1555], 0x10 \n\t"
  2819. "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
  2820. "daddiu %[width], %[width], -0x08 \n\t"
  2821. "bgtz %[width], 1b \n\t"
  2822. : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
  2823. [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
  2824. [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
  2825. [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
  2826. : [src_argb1555] "r"(src_argb1555), [dst_y] "r"(dst_y),
  2827. [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0),
  2828. [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [eight] "f"(0x08),
  2829. [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07)
  2830. : "memory");
  2831. }
  2832. void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444,
  2833. uint8_t* dst_y,
  2834. int width) {
  2835. uint64_t ftmp[11];
  2836. uint64_t value = 0x1080108010801080;
  2837. uint64_t mask = 0x0001004200810019;
  2838. uint64_t c0 = 0x000f000f000f000f;
  2839. uint64_t c1 = 0x00ff00ff00ff00ff;
  2840. __asm__ volatile(
  2841. "1: \n\t"
  2842. "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
  2843. "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
  2844. "psrlh %[src1], %[src0], %[eight] \n\t"
  2845. "and %[b], %[src0], %[c0] \n\t"
  2846. "and %[src0], %[src0], %[c1] \n\t"
  2847. "psrlh %[g], %[src0], %[four] \n\t"
  2848. "and %[r], %[src1], %[c0] \n\t"
  2849. "psllh %[src0], %[b], %[four] \n\t"
  2850. "or %[b], %[src0], %[b] \n\t"
  2851. "psllh %[src0], %[g], %[four] \n\t"
  2852. "or %[g], %[src0], %[g] \n\t"
  2853. "psllh %[src0], %[r], %[four] \n\t"
  2854. "or %[r], %[src0], %[r] \n\t"
  2855. "punpcklhw %[src0], %[b], %[r] \n\t"
  2856. "punpcklhw %[src1], %[g], %[value] \n\t"
  2857. "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
  2858. "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
  2859. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2860. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2861. "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
  2862. "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
  2863. "paddw %[dest0], %[src0], %[src1] \n\t"
  2864. "psrlw %[dest0], %[dest0], %[eight] \n\t"
  2865. "punpckhhw %[src0], %[b], %[r] \n\t"
  2866. "punpckhhw %[src1], %[g], %[value] \n\t"
  2867. "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
  2868. "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
  2869. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2870. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2871. "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
  2872. "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
  2873. "paddw %[dest1], %[src0], %[src1] \n\t"
  2874. "psrlw %[dest1], %[dest1], %[eight] \n\t"
  2875. "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t"
  2876. "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t"
  2877. "psrlh %[src1], %[src0], %[eight] \n\t"
  2878. "and %[b], %[src0], %[c0] \n\t"
  2879. "and %[src0], %[src0], %[c1] \n\t"
  2880. "psrlh %[g], %[src0], %[four] \n\t"
  2881. "and %[r], %[src1], %[c0] \n\t"
  2882. "psllh %[src0], %[b], %[four] \n\t"
  2883. "or %[b], %[src0], %[b] \n\t"
  2884. "psllh %[src0], %[g], %[four] \n\t"
  2885. "or %[g], %[src0], %[g] \n\t"
  2886. "psllh %[src0], %[r], %[four] \n\t"
  2887. "or %[r], %[src0], %[r] \n\t"
  2888. "punpcklhw %[src0], %[b], %[r] \n\t"
  2889. "punpcklhw %[src1], %[g], %[value] \n\t"
  2890. "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
  2891. "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
  2892. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2893. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2894. "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
  2895. "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
  2896. "paddw %[dest2], %[src0], %[src1] \n\t"
  2897. "psrlw %[dest2], %[dest2], %[eight] \n\t"
  2898. "punpckhhw %[src0], %[b], %[r] \n\t"
  2899. "punpckhhw %[src1], %[g], %[value] \n\t"
  2900. "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
  2901. "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
  2902. "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
  2903. "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
  2904. "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
  2905. "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
  2906. "paddw %[dest3], %[src0], %[src1] \n\t"
  2907. "psrlw %[dest3], %[dest3], %[eight] \n\t"
  2908. "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
  2909. "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
  2910. "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
  2911. "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
  2912. "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
  2913. "daddiu %[src_argb4444], %[src_argb4444], 0x10 \n\t"
  2914. "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
  2915. "daddiu %[width], %[width], -0x08 \n\t"
  2916. "bgtz %[width], 1b \n\t"
  2917. : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
  2918. [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
  2919. [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
  2920. [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
  2921. : [src_argb4444] "r"(src_argb4444), [dst_y] "r"(dst_y),
  2922. [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0),
  2923. [c1] "f"(c1), [eight] "f"(0x08), [four] "f"(0x04)
  2924. : "memory");
  2925. }
  2926. void RGB565ToUVRow_MMI(const uint8_t* src_rgb565,
  2927. int src_stride_rgb565,
  2928. uint8_t* dst_u,
  2929. uint8_t* dst_v,
  2930. int width) {
  2931. uint64_t ftmp[13];
  2932. uint64_t value = 0x2020202020202020;
  2933. uint64_t mask_u = 0x0026004a00700002;
  2934. uint64_t mask_v = 0x00020070005e0012;
  2935. uint64_t mask = 0x93;
  2936. uint64_t c0 = 0x001f001f001f001f;
  2937. uint64_t c1 = 0x00ff00ff00ff00ff;
  2938. uint64_t c2 = 0x0007000700070007;
  2939. __asm__ volatile(
  2940. "daddu %[next_rgb565], %[src_rgb565], %[next_rgb565] \n\t"
  2941. "1: \n\t"
  2942. "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
  2943. "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
  2944. "gsldrc1 %[src1], 0x00(%[next_rgb565]) \n\t"
  2945. "gsldlc1 %[src1], 0x07(%[next_rgb565]) \n\t"
  2946. "psrlh %[dest0_u], %[src0], %[eight] \n\t"
  2947. "and %[b0], %[src0], %[c0] \n\t"
  2948. "and %[src0], %[src0], %[c1] \n\t"
  2949. "psrlh %[src0], %[src0], %[five] \n\t"
  2950. "and %[g0], %[dest0_u], %[c2] \n\t"
  2951. "psllh %[g0], %[g0], %[three] \n\t"
  2952. "or %[g0], %[src0], %[g0] \n\t"
  2953. "psrlh %[r0], %[dest0_u], %[three] \n\t"
  2954. "psrlh %[src0], %[src1], %[eight] \n\t"
  2955. "and %[dest0_u], %[src1], %[c0] \n\t"
  2956. "and %[src1], %[src1], %[c1] \n\t"
  2957. "psrlh %[src1], %[src1], %[five] \n\t"
  2958. "and %[dest0_v], %[src0], %[c2] \n\t"
  2959. "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
  2960. "or %[dest0_v], %[src1], %[dest0_v] \n\t"
  2961. "psrlh %[src0], %[src0], %[three] \n\t"
  2962. "paddh %[b0], %[b0], %[dest0_u] \n\t"
  2963. "paddh %[g0], %[g0], %[dest0_v] \n\t"
  2964. "paddh %[r0], %[r0], %[src0] \n\t"
  2965. "punpcklhw %[src0], %[b0], %[r0] \n\t"
  2966. "punpckhhw %[src1], %[b0], %[r0] \n\t"
  2967. "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
  2968. "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
  2969. "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
  2970. "psrlh %[b0], %[src0], %[six] \n\t"
  2971. "psllh %[r0], %[src0], %[one] \n\t"
  2972. "or %[b0], %[b0], %[r0] \n\t"
  2973. "punpcklhw %[src0], %[g0], %[value] \n\t"
  2974. "punpckhhw %[src1], %[g0], %[value] \n\t"
  2975. "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
  2976. "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
  2977. "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
  2978. "punpcklhw %[src0], %[b0], %[g0] \n\t"
  2979. "punpckhhw %[src1], %[b0], %[g0] \n\t"
  2980. "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
  2981. "pshufh %[dest0_u], %[src0], %[mask] \n\t"
  2982. "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
  2983. "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
  2984. "pshufh %[b0], %[src1], %[mask] \n\t"
  2985. "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
  2986. "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
  2987. "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
  2988. "psubw %[dest0_u], %[src0], %[src1] \n\t"
  2989. "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
  2990. "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
  2991. "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
  2992. "psubw %[dest0_v], %[src1], %[src0] \n\t"
  2993. "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
  2994. "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t"
  2995. "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t"
  2996. "gsldrc1 %[src1], 0x08(%[next_rgb565]) \n\t"
  2997. "gsldlc1 %[src1], 0x0f(%[next_rgb565]) \n\t"
  2998. "psrlh %[dest1_u], %[src0], %[eight] \n\t"
  2999. "and %[b0], %[src0], %[c0] \n\t"
  3000. "and %[src0], %[src0], %[c1] \n\t"
  3001. "psrlh %[src0], %[src0], %[five] \n\t"
  3002. "and %[g0], %[dest1_u], %[c2] \n\t"
  3003. "psllh %[g0], %[g0], %[three] \n\t"
  3004. "or %[g0], %[src0], %[g0] \n\t"
  3005. "psrlh %[r0], %[dest1_u], %[three] \n\t"
  3006. "psrlh %[src0], %[src1], %[eight] \n\t"
  3007. "and %[dest1_u], %[src1], %[c0] \n\t"
  3008. "and %[src1], %[src1], %[c1] \n\t"
  3009. "psrlh %[src1], %[src1], %[five] \n\t"
  3010. "and %[dest1_v], %[src0], %[c2] \n\t"
  3011. "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
  3012. "or %[dest1_v], %[src1], %[dest1_v] \n\t"
  3013. "psrlh %[src0], %[src0], %[three] \n\t"
  3014. "paddh %[b0], %[b0], %[dest1_u] \n\t"
  3015. "paddh %[g0], %[g0], %[dest1_v] \n\t"
  3016. "paddh %[r0], %[r0], %[src0] \n\t"
  3017. "punpcklhw %[src0], %[b0], %[r0] \n\t"
  3018. "punpckhhw %[src1], %[b0], %[r0] \n\t"
  3019. "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
  3020. "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
  3021. "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
  3022. "psrlh %[b0], %[src0], %[six] \n\t"
  3023. "psllh %[r0], %[src0], %[one] \n\t"
  3024. "or %[b0], %[b0], %[r0] \n\t"
  3025. "punpcklhw %[src0], %[g0], %[value] \n\t"
  3026. "punpckhhw %[src1], %[g0], %[value] \n\t"
  3027. "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
  3028. "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
  3029. "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
  3030. "punpcklhw %[src0], %[b0], %[g0] \n\t"
  3031. "punpckhhw %[src1], %[b0], %[g0] \n\t"
  3032. "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
  3033. "pshufh %[dest1_u], %[src0], %[mask] \n\t"
  3034. "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
  3035. "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
  3036. "pshufh %[b0], %[src1], %[mask] \n\t"
  3037. "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
  3038. "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
  3039. "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
  3040. "psubw %[dest1_u], %[src0], %[src1] \n\t"
  3041. "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
  3042. "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
  3043. "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
  3044. "psubw %[dest1_v], %[src1], %[src0] \n\t"
  3045. "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
  3046. "gsldrc1 %[src0], 0x10(%[src_rgb565]) \n\t"
  3047. "gsldlc1 %[src0], 0x17(%[src_rgb565]) \n\t"
  3048. "gsldrc1 %[src1], 0x10(%[next_rgb565]) \n\t"
  3049. "gsldlc1 %[src1], 0x17(%[next_rgb565]) \n\t"
  3050. "psrlh %[dest2_u], %[src0], %[eight] \n\t"
  3051. "and %[b0], %[src0], %[c0] \n\t"
  3052. "and %[src0], %[src0], %[c1] \n\t"
  3053. "psrlh %[src0], %[src0], %[five] \n\t"
  3054. "and %[g0], %[dest2_u], %[c2] \n\t"
  3055. "psllh %[g0], %[g0], %[three] \n\t"
  3056. "or %[g0], %[src0], %[g0] \n\t"
  3057. "psrlh %[r0], %[dest2_u], %[three] \n\t"
  3058. "psrlh %[src0], %[src1], %[eight] \n\t"
  3059. "and %[dest2_u], %[src1], %[c0] \n\t"
  3060. "and %[src1], %[src1], %[c1] \n\t"
  3061. "psrlh %[src1], %[src1], %[five] \n\t"
  3062. "and %[dest2_v], %[src0], %[c2] \n\t"
  3063. "psllh %[dest2_v], %[dest2_v], %[three] \n\t"
  3064. "or %[dest2_v], %[src1], %[dest2_v] \n\t"
  3065. "psrlh %[src0], %[src0], %[three] \n\t"
  3066. "paddh %[b0], %[b0], %[dest2_u] \n\t"
  3067. "paddh %[g0], %[g0], %[dest2_v] \n\t"
  3068. "paddh %[r0], %[r0], %[src0] \n\t"
  3069. "punpcklhw %[src0], %[b0], %[r0] \n\t"
  3070. "punpckhhw %[src1], %[b0], %[r0] \n\t"
  3071. "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
  3072. "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
  3073. "paddh %[src0], %[dest2_u], %[dest2_v] \n\t"
  3074. "psrlh %[b0], %[src0], %[six] \n\t"
  3075. "psllh %[r0], %[src0], %[one] \n\t"
  3076. "or %[b0], %[b0], %[r0] \n\t"
  3077. "punpcklhw %[src0], %[g0], %[value] \n\t"
  3078. "punpckhhw %[src1], %[g0], %[value] \n\t"
  3079. "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
  3080. "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
  3081. "paddh %[g0], %[dest2_u], %[dest2_v] \n\t"
  3082. "punpcklhw %[src0], %[b0], %[g0] \n\t"
  3083. "punpckhhw %[src1], %[b0], %[g0] \n\t"
  3084. "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t"
  3085. "pshufh %[dest2_u], %[src0], %[mask] \n\t"
  3086. "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
  3087. "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
  3088. "pshufh %[b0], %[src1], %[mask] \n\t"
  3089. "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
  3090. "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
  3091. "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
  3092. "psubw %[dest2_u], %[src0], %[src1] \n\t"
  3093. "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
  3094. "punpcklwd %[src0], %[dest2_v], %[g0] \n\t"
  3095. "punpckhwd %[src1], %[dest2_v], %[g0] \n\t"
  3096. "psubw %[dest2_v], %[src1], %[src0] \n\t"
  3097. "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
  3098. "gsldrc1 %[src0], 0x18(%[src_rgb565]) \n\t"
  3099. "gsldlc1 %[src0], 0x1f(%[src_rgb565]) \n\t"
  3100. "gsldrc1 %[src1], 0x18(%[next_rgb565]) \n\t"
  3101. "gsldlc1 %[src1], 0x1f(%[next_rgb565]) \n\t"
  3102. "psrlh %[dest3_u], %[src0], %[eight] \n\t"
  3103. "and %[b0], %[src0], %[c0] \n\t"
  3104. "and %[src0], %[src0], %[c1] \n\t"
  3105. "psrlh %[src0], %[src0], %[five] \n\t"
  3106. "and %[g0], %[dest3_u], %[c2] \n\t"
  3107. "psllh %[g0], %[g0], %[three] \n\t"
  3108. "or %[g0], %[src0], %[g0] \n\t"
  3109. "psrlh %[r0], %[dest3_u], %[three] \n\t"
  3110. "psrlh %[src0], %[src1], %[eight] \n\t"
  3111. "and %[dest3_u], %[src1], %[c0] \n\t"
  3112. "and %[src1], %[src1], %[c1] \n\t"
  3113. "psrlh %[src1], %[src1], %[five] \n\t"
  3114. "and %[dest3_v], %[src0], %[c2] \n\t"
  3115. "psllh %[dest3_v], %[dest3_v], %[three] \n\t"
  3116. "or %[dest3_v], %[src1], %[dest3_v] \n\t"
  3117. "psrlh %[src0], %[src0], %[three] \n\t"
  3118. "paddh %[b0], %[b0], %[dest3_u] \n\t"
  3119. "paddh %[g0], %[g0], %[dest3_v] \n\t"
  3120. "paddh %[r0], %[r0], %[src0] \n\t"
  3121. "punpcklhw %[src0], %[b0], %[r0] \n\t"
  3122. "punpckhhw %[src1], %[b0], %[r0] \n\t"
  3123. "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
  3124. "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
  3125. "paddh %[src0], %[dest3_u], %[dest3_v] \n\t"
  3126. "psrlh %[b0], %[src0], %[six] \n\t"
  3127. "psllh %[r0], %[src0], %[one] \n\t"
  3128. "or %[b0], %[b0], %[r0] \n\t"
  3129. "punpcklhw %[src0], %[g0], %[value] \n\t"
  3130. "punpckhhw %[src1], %[g0], %[value] \n\t"
  3131. "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
  3132. "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
  3133. "paddh %[g0], %[dest3_u], %[dest3_v] \n\t"
  3134. "punpcklhw %[src0], %[b0], %[g0] \n\t"
  3135. "punpckhhw %[src1], %[b0], %[g0] \n\t"
  3136. "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t"
  3137. "pshufh %[dest3_u], %[src0], %[mask] \n\t"
  3138. "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
  3139. "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
  3140. "pshufh %[b0], %[src1], %[mask] \n\t"
  3141. "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
  3142. "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
  3143. "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
  3144. "psubw %[dest3_u], %[src0], %[src1] \n\t"
  3145. "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
  3146. "punpcklwd %[src0], %[dest3_v], %[g0] \n\t"
  3147. "punpckhwd %[src1], %[dest3_v], %[g0] \n\t"
  3148. "psubw %[dest3_v], %[src1], %[src0] \n\t"
  3149. "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
  3150. "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
  3151. "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
  3152. "packushb %[dest0_u], %[src0], %[src1] \n\t"
  3153. "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
  3154. "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
  3155. "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
  3156. "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
  3157. "packushb %[dest0_v], %[src0], %[src1] \n\t"
  3158. "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
  3159. "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
  3160. "daddiu %[src_rgb565], %[src_rgb565], 0x20 \n\t"
  3161. "daddiu %[next_rgb565], %[next_rgb565], 0x20 \n\t"
  3162. "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
  3163. "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
  3164. "daddiu %[width], %[width], -0x10 \n\t"
  3165. "bgtz %[width], 1b \n\t"
  3166. : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
  3167. [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
  3168. [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
  3169. [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
  3170. [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]),
  3171. [dest3_v] "=&f"(ftmp[12])
  3172. : [src_rgb565] "r"(src_rgb565), [next_rgb565] "r"(src_stride_rgb565),
  3173. [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
  3174. [value] "f"(value), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
  3175. [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
  3176. [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03),
  3177. [one] "f"(0x01)
  3178. : "memory");
  3179. }
  3180. void ARGB1555ToUVRow_MMI(const uint8_t* src_argb1555,
  3181. int src_stride_argb1555,
  3182. uint8_t* dst_u,
  3183. uint8_t* dst_v,
  3184. int width) {
  3185. uint64_t ftmp[11];
  3186. uint64_t value = 0x2020202020202020;
  3187. uint64_t mask_u = 0x0026004a00700002;
  3188. uint64_t mask_v = 0x00020070005e0012;
  3189. uint64_t mask = 0x93;
  3190. uint64_t c0 = 0x001f001f001f001f;
  3191. uint64_t c1 = 0x00ff00ff00ff00ff;
  3192. uint64_t c2 = 0x0003000300030003;
  3193. uint64_t c3 = 0x007c007c007c007c;
  3194. __asm__ volatile(
  3195. "daddu %[next_argb1555], %[src_argb1555], %[next_argb1555] \n\t"
  3196. "1: \n\t"
  3197. "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
  3198. "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
  3199. "gsldrc1 %[src1], 0x00(%[next_argb1555]) \n\t"
  3200. "gsldlc1 %[src1], 0x07(%[next_argb1555]) \n\t"
  3201. "psrlh %[dest0_u], %[src0], %[eight] \n\t"
  3202. "and %[b0], %[src0], %[c0] \n\t"
  3203. "and %[src0], %[src0], %[c1] \n\t"
  3204. "psrlh %[src0], %[src0], %[five] \n\t"
  3205. "and %[g0], %[dest0_u], %[c2] \n\t"
  3206. "psllh %[g0], %[g0], %[three] \n\t"
  3207. "or %[g0], %[src0], %[g0] \n\t"
  3208. "and %[r0], %[dest0_u], %[c3] \n\t"
  3209. "psrlh %[r0], %[r0], %[two] \n\t"
  3210. "psrlh %[src0], %[src1], %[eight] \n\t"
  3211. "and %[dest0_u], %[src1], %[c0] \n\t"
  3212. "and %[src1], %[src1], %[c1] \n\t"
  3213. "psrlh %[src1], %[src1], %[five] \n\t"
  3214. "and %[dest0_v], %[src0], %[c2] \n\t"
  3215. "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
  3216. "or %[dest0_v], %[src1], %[dest0_v] \n\t"
  3217. "and %[src0], %[src0], %[c3] \n\t"
  3218. "psrlh %[src0], %[src0], %[two] \n\t"
  3219. "paddh %[b0], %[b0], %[dest0_u] \n\t"
  3220. "paddh %[g0], %[g0], %[dest0_v] \n\t"
  3221. "paddh %[r0], %[r0], %[src0] \n\t"
  3222. "punpcklhw %[src0], %[b0], %[r0] \n\t"
  3223. "punpckhhw %[src1], %[b0], %[r0] \n\t"
  3224. "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
  3225. "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
  3226. "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
  3227. "psrlh %[b0], %[src0], %[six] \n\t"
  3228. "psllh %[r0], %[src0], %[one] \n\t"
  3229. "or %[b0], %[b0], %[r0] \n\t"
  3230. "psrlh %[r0], %[g0], %[six] \n\t"
  3231. "psllh %[g0], %[g0], %[one] \n\t"
  3232. "or %[g0], %[g0], %[r0] \n\t"
  3233. "punpcklhw %[src0], %[g0], %[value] \n\t"
  3234. "punpckhhw %[src1], %[g0], %[value] \n\t"
  3235. "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
  3236. "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
  3237. "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
  3238. "punpcklhw %[src0], %[b0], %[g0] \n\t"
  3239. "punpckhhw %[src1], %[b0], %[g0] \n\t"
  3240. "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
  3241. "pshufh %[dest0_u], %[src0], %[mask] \n\t"
  3242. "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
  3243. "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
  3244. "pshufh %[b0], %[src1], %[mask] \n\t"
  3245. "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
  3246. "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
  3247. "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
  3248. "psubw %[dest0_u], %[src0], %[src1] \n\t"
  3249. "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
  3250. "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
  3251. "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
  3252. "psubw %[dest0_v], %[src1], %[src0] \n\t"
  3253. "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
  3254. "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t"
  3255. "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t"
  3256. "gsldrc1 %[src1], 0x08(%[next_argb1555]) \n\t"
  3257. "gsldlc1 %[src1], 0x0f(%[next_argb1555]) \n\t"
  3258. "psrlh %[dest1_u], %[src0], %[eight] \n\t"
  3259. "and %[b0], %[src0], %[c0] \n\t"
  3260. "and %[src0], %[src0], %[c1] \n\t"
  3261. "psrlh %[src0], %[src0], %[five] \n\t"
  3262. "and %[g0], %[dest1_u], %[c2] \n\t"
  3263. "psllh %[g0], %[g0], %[three] \n\t"
  3264. "or %[g0], %[src0], %[g0] \n\t"
  3265. "and %[r0], %[dest1_u], %[c3] \n\t"
  3266. "psrlh %[r0], %[r0], %[two] \n\t"
  3267. "psrlh %[src0], %[src1], %[eight] \n\t"
  3268. "and %[dest1_u], %[src1], %[c0] \n\t"
  3269. "and %[src1], %[src1], %[c1] \n\t"
  3270. "psrlh %[src1], %[src1], %[five] \n\t"
  3271. "and %[dest1_v], %[src0], %[c2] \n\t"
  3272. "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
  3273. "or %[dest1_v], %[src1], %[dest1_v] \n\t"
  3274. "and %[src0], %[src0], %[c3] \n\t"
  3275. "psrlh %[src0], %[src0], %[two] \n\t"
  3276. "paddh %[b0], %[b0], %[dest1_u] \n\t"
  3277. "paddh %[g0], %[g0], %[dest1_v] \n\t"
  3278. "paddh %[r0], %[r0], %[src0] \n\t"
  3279. "punpcklhw %[src0], %[b0], %[r0] \n\t"
  3280. "punpckhhw %[src1], %[b0], %[r0] \n\t"
  3281. "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
  3282. "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
  3283. "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
  3284. "psrlh %[b0], %[src0], %[six] \n\t"
  3285. "psllh %[r0], %[src0], %[one] \n\t"
  3286. "or %[b0], %[b0], %[r0] \n\t"
  3287. "psrlh %[r0], %[g0], %[six] \n\t"
  3288. "psllh %[g0], %[g0], %[one] \n\t"
  3289. "or %[g0], %[g0], %[r0] \n\t"
  3290. "punpcklhw %[src0], %[g0], %[value] \n\t"
  3291. "punpckhhw %[src1], %[g0], %[value] \n\t"
  3292. "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
  3293. "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
  3294. "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
  3295. "punpcklhw %[src0], %[b0], %[g0] \n\t"
  3296. "punpckhhw %[src1], %[b0], %[g0] \n\t"
  3297. "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
  3298. "pshufh %[dest1_u], %[src0], %[mask] \n\t"
  3299. "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
  3300. "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
  3301. "pshufh %[b0], %[src1], %[mask] \n\t"
  3302. "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
  3303. "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
  3304. "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
  3305. "psubw %[dest1_u], %[src0], %[src1] \n\t"
  3306. "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
  3307. "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
  3308. "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
  3309. "psubw %[dest1_v], %[src1], %[src0] \n\t"
  3310. "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
  3311. "packsswh %[dest0_u], %[dest0_u], %[dest1_u] \n\t"
  3312. "packsswh %[dest1_u], %[dest0_v], %[dest1_v] \n\t"
  3313. "gsldrc1 %[src0], 0x10(%[src_argb1555]) \n\t"
  3314. "gsldlc1 %[src0], 0x17(%[src_argb1555]) \n\t"
  3315. "gsldrc1 %[src1], 0x10(%[next_argb1555]) \n\t"
  3316. "gsldlc1 %[src1], 0x17(%[next_argb1555]) \n\t"
  3317. "psrlh %[dest2_u], %[src0], %[eight] \n\t"
  3318. "and %[b0], %[src0], %[c0] \n\t"
  3319. "and %[src0], %[src0], %[c1] \n\t"
  3320. "psrlh %[src0], %[src0], %[five] \n\t"
  3321. "and %[g0], %[dest2_u], %[c2] \n\t"
  3322. "psllh %[g0], %[g0], %[three] \n\t"
  3323. "or %[g0], %[src0], %[g0] \n\t"
  3324. "and %[r0], %[dest2_u], %[c3] \n\t"
  3325. "psrlh %[r0], %[r0], %[two] \n\t"
  3326. "psrlh %[src0], %[src1], %[eight] \n\t"
  3327. "and %[dest2_u], %[src1], %[c0] \n\t"
  3328. "and %[src1], %[src1], %[c1] \n\t"
  3329. "psrlh %[src1], %[src1], %[five] \n\t"
  3330. "and %[dest0_v], %[src0], %[c2] \n\t"
  3331. "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
  3332. "or %[dest0_v], %[src1], %[dest0_v] \n\t"
  3333. "and %[src0], %[src0], %[c3] \n\t"
  3334. "psrlh %[src0], %[src0], %[two] \n\t"
  3335. "paddh %[b0], %[b0], %[dest2_u] \n\t"
  3336. "paddh %[g0], %[g0], %[dest0_v] \n\t"
  3337. "paddh %[r0], %[r0], %[src0] \n\t"
  3338. "punpcklhw %[src0], %[b0], %[r0] \n\t"
  3339. "punpckhhw %[src1], %[b0], %[r0] \n\t"
  3340. "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
  3341. "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
  3342. "paddh %[src0], %[dest2_u], %[dest0_v] \n\t"
  3343. "psrlh %[b0], %[src0], %[six] \n\t"
  3344. "psllh %[r0], %[src0], %[one] \n\t"
  3345. "or %[b0], %[b0], %[r0] \n\t"
  3346. "psrlh %[r0], %[g0], %[six] \n\t"
  3347. "psllh %[g0], %[g0], %[one] \n\t"
  3348. "or %[g0], %[g0], %[r0] \n\t"
  3349. "punpcklhw %[src0], %[g0], %[value] \n\t"
  3350. "punpckhhw %[src1], %[g0], %[value] \n\t"
  3351. "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
  3352. "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
  3353. "paddh %[g0], %[dest2_u], %[dest0_v] \n\t"
  3354. "punpcklhw %[src0], %[b0], %[g0] \n\t"
  3355. "punpckhhw %[src1], %[b0], %[g0] \n\t"
  3356. "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
  3357. "pshufh %[dest2_u], %[src0], %[mask] \n\t"
  3358. "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
  3359. "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
  3360. "pshufh %[b0], %[src1], %[mask] \n\t"
  3361. "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
  3362. "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
  3363. "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
  3364. "psubw %[dest2_u], %[src0], %[src1] \n\t"
  3365. "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
  3366. "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
  3367. "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
  3368. "psubw %[dest0_v], %[src1], %[src0] \n\t"
  3369. "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
  3370. "gsldrc1 %[src0], 0x18(%[src_argb1555]) \n\t"
  3371. "gsldlc1 %[src0], 0x1f(%[src_argb1555]) \n\t"
  3372. "gsldrc1 %[src1], 0x18(%[next_argb1555]) \n\t"
  3373. "gsldlc1 %[src1], 0x1f(%[next_argb1555]) \n\t"
  3374. "psrlh %[dest3_u], %[src0], %[eight] \n\t"
  3375. "and %[b0], %[src0], %[c0] \n\t"
  3376. "and %[src0], %[src0], %[c1] \n\t"
  3377. "psrlh %[src0], %[src0], %[five] \n\t"
  3378. "and %[g0], %[dest3_u], %[c2] \n\t"
  3379. "psllh %[g0], %[g0], %[three] \n\t"
  3380. "or %[g0], %[src0], %[g0] \n\t"
  3381. "and %[r0], %[dest3_u], %[c3] \n\t"
  3382. "psrlh %[r0], %[r0], %[two] \n\t"
  3383. "psrlh %[src0], %[src1], %[eight] \n\t"
  3384. "and %[dest3_u], %[src1], %[c0] \n\t"
  3385. "and %[src1], %[src1], %[c1] \n\t"
  3386. "psrlh %[src1], %[src1], %[five] \n\t"
  3387. "and %[dest1_v], %[src0], %[c2] \n\t"
  3388. "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
  3389. "or %[dest1_v], %[src1], %[dest1_v] \n\t"
  3390. "and %[src0], %[src0], %[c3] \n\t"
  3391. "psrlh %[src0], %[src0], %[two] \n\t"
  3392. "paddh %[b0], %[b0], %[dest3_u] \n\t"
  3393. "paddh %[g0], %[g0], %[dest1_v] \n\t"
  3394. "paddh %[r0], %[r0], %[src0] \n\t"
  3395. "punpcklhw %[src0], %[b0], %[r0] \n\t"
  3396. "punpckhhw %[src1], %[b0], %[r0] \n\t"
  3397. "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
  3398. "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
  3399. "paddh %[src0], %[dest3_u], %[dest1_v] \n\t"
  3400. "psrlh %[b0], %[src0], %[six] \n\t"
  3401. "psllh %[r0], %[src0], %[one] \n\t"
  3402. "or %[b0], %[b0], %[r0] \n\t"
  3403. "psrlh %[r0], %[g0], %[six] \n\t"
  3404. "psllh %[g0], %[g0], %[one] \n\t"
  3405. "or %[g0], %[g0], %[r0] \n\t"
  3406. "punpcklhw %[src0], %[g0], %[value] \n\t"
  3407. "punpckhhw %[src1], %[g0], %[value] \n\t"
  3408. "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
  3409. "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
  3410. "paddh %[g0], %[dest3_u], %[dest1_v] \n\t"
  3411. "punpcklhw %[src0], %[b0], %[g0] \n\t"
  3412. "punpckhhw %[src1], %[b0], %[g0] \n\t"
  3413. "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
  3414. "pshufh %[dest3_u], %[src0], %[mask] \n\t"
  3415. "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
  3416. "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
  3417. "pshufh %[b0], %[src1], %[mask] \n\t"
  3418. "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
  3419. "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
  3420. "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
  3421. "psubw %[dest3_u], %[src0], %[src1] \n\t"
  3422. "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
  3423. "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
  3424. "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
  3425. "psubw %[dest1_v], %[src1], %[src0] \n\t"
  3426. "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
  3427. "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
  3428. "packushb %[dest0_u], %[dest0_u], %[src1] \n\t"
  3429. "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
  3430. "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
  3431. "packsswh %[src1], %[dest0_v], %[dest1_v] \n\t"
  3432. "packushb %[dest0_v], %[dest1_u], %[src1] \n\t"
  3433. "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
  3434. "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
  3435. "daddiu %[src_argb1555], %[src_argb1555], 0x20 \n\t"
  3436. "daddiu %[next_argb1555], %[next_argb1555], 0x20 \n\t"
  3437. "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
  3438. "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
  3439. "daddiu %[width], %[width], -0x10 \n\t"
  3440. "bgtz %[width], 1b \n\t"
  3441. : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
  3442. [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
  3443. [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
  3444. [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
  3445. [dest1_v] "=&f"(ftmp[10])
  3446. : [src_argb1555] "r"(src_argb1555),
  3447. [next_argb1555] "r"(src_stride_argb1555), [dst_u] "r"(dst_u),
  3448. [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value),
  3449. [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3),
  3450. [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
  3451. [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03),
  3452. [two] "f"(0x02), [one] "f"(0x01)
  3453. : "memory");
  3454. }
  3455. void ARGB4444ToUVRow_MMI(const uint8_t* src_argb4444,
  3456. int src_stride_argb4444,
  3457. uint8_t* dst_u,
  3458. uint8_t* dst_v,
  3459. int width) {
  3460. uint64_t ftmp[13];
  3461. uint64_t value = 0x2020202020202020;
  3462. uint64_t mask_u = 0x0026004a00700002;
  3463. uint64_t mask_v = 0x00020070005e0012;
  3464. uint64_t mask = 0x93;
  3465. uint64_t c0 = 0x000f000f000f000f;
  3466. uint64_t c1 = 0x00ff00ff00ff00ff;
  3467. __asm__ volatile(
  3468. "daddu %[next_argb4444], %[src_argb4444], %[next_argb4444] \n\t"
  3469. "1: \n\t"
  3470. "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
  3471. "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
  3472. "gsldrc1 %[src1], 0x00(%[next_argb4444]) \n\t"
  3473. "gsldlc1 %[src1], 0x07(%[next_argb4444]) \n\t"
  3474. "psrlh %[dest0_u], %[src0], %[eight] \n\t"
  3475. "and %[b0], %[src0], %[c0] \n\t"
  3476. "and %[src0], %[src0], %[c1] \n\t"
  3477. "psrlh %[g0], %[src0], %[four] \n\t"
  3478. "and %[r0], %[dest0_u], %[c0] \n\t"
  3479. "psrlh %[src0], %[src1], %[eight] \n\t"
  3480. "and %[dest0_u], %[src1], %[c0] \n\t"
  3481. "and %[src1], %[src1], %[c1] \n\t"
  3482. "psrlh %[dest0_v], %[src1], %[four] \n\t"
  3483. "and %[src0], %[src0], %[c0] \n\t"
  3484. "paddh %[b0], %[b0], %[dest0_u] \n\t"
  3485. "paddh %[g0], %[g0], %[dest0_v] \n\t"
  3486. "paddh %[r0], %[r0], %[src0] \n\t"
  3487. "punpcklhw %[src0], %[b0], %[r0] \n\t"
  3488. "punpckhhw %[src1], %[b0], %[r0] \n\t"
  3489. "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
  3490. "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
  3491. "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
  3492. "psrlh %[b0], %[src0], %[four] \n\t"
  3493. "psllh %[r0], %[src0], %[two] \n\t"
  3494. "or %[b0], %[b0], %[r0] \n\t"
  3495. "psrlh %[r0], %[g0], %[four] \n\t"
  3496. "psllh %[g0], %[g0], %[two] \n\t"
  3497. "or %[g0], %[g0], %[r0] \n\t"
  3498. "punpcklhw %[src0], %[g0], %[value] \n\t"
  3499. "punpckhhw %[src1], %[g0], %[value] \n\t"
  3500. "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
  3501. "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
  3502. "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
  3503. "punpcklhw %[src0], %[b0], %[g0] \n\t"
  3504. "punpckhhw %[src1], %[b0], %[g0] \n\t"
  3505. "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
  3506. "pshufh %[dest0_u], %[src0], %[mask] \n\t"
  3507. "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
  3508. "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
  3509. "pshufh %[b0], %[src1], %[mask] \n\t"
  3510. "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
  3511. "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
  3512. "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
  3513. "psubw %[dest0_u], %[src0], %[src1] \n\t"
  3514. "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
  3515. "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
  3516. "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
  3517. "psubw %[dest0_v], %[src1], %[src0] \n\t"
  3518. "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
  3519. "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t"
  3520. "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t"
  3521. "gsldrc1 %[src1], 0x08(%[next_argb4444]) \n\t"
  3522. "gsldlc1 %[src1], 0x0f(%[next_argb4444]) \n\t"
  3523. "psrlh %[dest1_u], %[src0], %[eight] \n\t"
  3524. "and %[b0], %[src0], %[c0] \n\t"
  3525. "and %[src0], %[src0], %[c1] \n\t"
  3526. "psrlh %[g0], %[src0], %[four] \n\t"
  3527. "and %[r0], %[dest1_u], %[c0] \n\t"
  3528. "psrlh %[src0], %[src1], %[eight] \n\t"
  3529. "and %[dest1_u], %[src1], %[c0] \n\t"
  3530. "and %[src1], %[src1], %[c1] \n\t"
  3531. "psrlh %[dest1_v], %[src1], %[four] \n\t"
  3532. "and %[src0], %[src0], %[c0] \n\t"
  3533. "paddh %[b0], %[b0], %[dest1_u] \n\t"
  3534. "paddh %[g0], %[g0], %[dest1_v] \n\t"
  3535. "paddh %[r0], %[r0], %[src0] \n\t"
  3536. "punpcklhw %[src0], %[b0], %[r0] \n\t"
  3537. "punpckhhw %[src1], %[b0], %[r0] \n\t"
  3538. "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
  3539. "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
  3540. "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
  3541. "psrlh %[b0], %[src0], %[four] \n\t"
  3542. "psllh %[r0], %[src0], %[two] \n\t"
  3543. "or %[b0], %[b0], %[r0] \n\t"
  3544. "psrlh %[r0], %[g0], %[four] \n\t"
  3545. "psllh %[g0], %[g0], %[two] \n\t"
  3546. "or %[g0], %[g0], %[r0] \n\t"
  3547. "punpcklhw %[src0], %[g0], %[value] \n\t"
  3548. "punpckhhw %[src1], %[g0], %[value] \n\t"
  3549. "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
  3550. "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
  3551. "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
  3552. "punpcklhw %[src0], %[b0], %[g0] \n\t"
  3553. "punpckhhw %[src1], %[b0], %[g0] \n\t"
  3554. "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
  3555. "pshufh %[dest1_u], %[src0], %[mask] \n\t"
  3556. "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
  3557. "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
  3558. "pshufh %[b0], %[src1], %[mask] \n\t"
  3559. "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
  3560. "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
  3561. "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
  3562. "psubw %[dest1_u], %[src0], %[src1] \n\t"
  3563. "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
  3564. "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
  3565. "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
  3566. "psubw %[dest1_v], %[src1], %[src0] \n\t"
  3567. "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
  3568. "gsldrc1 %[src0], 0x10(%[src_argb4444]) \n\t"
  3569. "gsldlc1 %[src0], 0x17(%[src_argb4444]) \n\t"
  3570. "gsldrc1 %[src1], 0x10(%[next_argb4444]) \n\t"
  3571. "gsldlc1 %[src1], 0x17(%[next_argb4444]) \n\t"
  3572. "psrlh %[dest2_u], %[src0], %[eight] \n\t"
  3573. "and %[b0], %[src0], %[c0] \n\t"
  3574. "and %[src0], %[src0], %[c1] \n\t"
  3575. "psrlh %[g0], %[src0], %[four] \n\t"
  3576. "and %[r0], %[dest2_u], %[c0] \n\t"
  3577. "psrlh %[src0], %[src1], %[eight] \n\t"
  3578. "and %[dest2_u], %[src1], %[c0] \n\t"
  3579. "and %[src1], %[src1], %[c1] \n\t"
  3580. "psrlh %[dest2_v], %[src1], %[four] \n\t"
  3581. "and %[src0], %[src0], %[c0] \n\t"
  3582. "paddh %[b0], %[b0], %[dest2_u] \n\t"
  3583. "paddh %[g0], %[g0], %[dest2_v] \n\t"
  3584. "paddh %[r0], %[r0], %[src0] \n\t"
  3585. "punpcklhw %[src0], %[b0], %[r0] \n\t"
  3586. "punpckhhw %[src1], %[b0], %[r0] \n\t"
  3587. "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
  3588. "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
  3589. "paddh %[src0], %[dest2_u], %[dest2_v] \n\t"
  3590. "psrlh %[b0], %[src0], %[four] \n\t"
  3591. "psllh %[r0], %[src0], %[two] \n\t"
  3592. "or %[b0], %[b0], %[r0] \n\t"
  3593. "psrlh %[r0], %[g0], %[four] \n\t"
  3594. "psllh %[g0], %[g0], %[two] \n\t"
  3595. "or %[g0], %[g0], %[r0] \n\t"
  3596. "punpcklhw %[src0], %[g0], %[value] \n\t"
  3597. "punpckhhw %[src1], %[g0], %[value] \n\t"
  3598. "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
  3599. "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
  3600. "paddh %[g0], %[dest2_u], %[dest2_v] \n\t"
  3601. "punpcklhw %[src0], %[b0], %[g0] \n\t"
  3602. "punpckhhw %[src1], %[b0], %[g0] \n\t"
  3603. "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t"
  3604. "pshufh %[dest2_u], %[src0], %[mask] \n\t"
  3605. "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
  3606. "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
  3607. "pshufh %[b0], %[src1], %[mask] \n\t"
  3608. "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
  3609. "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
  3610. "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
  3611. "psubw %[dest2_u], %[src0], %[src1] \n\t"
  3612. "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
  3613. "punpcklwd %[src0], %[dest2_v], %[g0] \n\t"
  3614. "punpckhwd %[src1], %[dest2_v], %[g0] \n\t"
  3615. "psubw %[dest2_v], %[src1], %[src0] \n\t"
  3616. "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
  3617. "gsldrc1 %[src0], 0x18(%[src_argb4444]) \n\t"
  3618. "gsldlc1 %[src0], 0x1f(%[src_argb4444]) \n\t"
  3619. "gsldrc1 %[src1], 0x18(%[next_argb4444]) \n\t"
  3620. "gsldlc1 %[src1], 0x1f(%[next_argb4444]) \n\t"
  3621. "psrlh %[dest3_u], %[src0], %[eight] \n\t"
  3622. "and %[b0], %[src0], %[c0] \n\t"
  3623. "and %[src0], %[src0], %[c1] \n\t"
  3624. "psrlh %[g0], %[src0], %[four] \n\t"
  3625. "and %[r0], %[dest3_u], %[c0] \n\t"
  3626. "psrlh %[src0], %[src1], %[eight] \n\t"
  3627. "and %[dest3_u], %[src1], %[c0] \n\t"
  3628. "and %[src1], %[src1], %[c1] \n\t"
  3629. "psrlh %[dest3_v], %[src1], %[four] \n\t"
  3630. "and %[src0], %[src0], %[c0] \n\t"
  3631. "paddh %[b0], %[b0], %[dest3_u] \n\t"
  3632. "paddh %[g0], %[g0], %[dest3_v] \n\t"
  3633. "paddh %[r0], %[r0], %[src0] \n\t"
  3634. "punpcklhw %[src0], %[b0], %[r0] \n\t"
  3635. "punpckhhw %[src1], %[b0], %[r0] \n\t"
  3636. "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
  3637. "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
  3638. "paddh %[src0], %[dest3_u], %[dest3_v] \n\t"
  3639. "psrlh %[b0], %[src0], %[four] \n\t"
  3640. "psllh %[r0], %[src0], %[two] \n\t"
  3641. "or %[b0], %[b0], %[r0] \n\t"
  3642. "psrlh %[r0], %[g0], %[four] \n\t"
  3643. "psllh %[g0], %[g0], %[two] \n\t"
  3644. "or %[g0], %[g0], %[r0] \n\t"
  3645. "punpcklhw %[src0], %[g0], %[value] \n\t"
  3646. "punpckhhw %[src1], %[g0], %[value] \n\t"
  3647. "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
  3648. "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
  3649. "paddh %[g0], %[dest3_u], %[dest3_v] \n\t"
  3650. "punpcklhw %[src0], %[b0], %[g0] \n\t"
  3651. "punpckhhw %[src1], %[b0], %[g0] \n\t"
  3652. "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t"
  3653. "pshufh %[dest3_u], %[src0], %[mask] \n\t"
  3654. "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
  3655. "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
  3656. "pshufh %[b0], %[src1], %[mask] \n\t"
  3657. "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
  3658. "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
  3659. "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
  3660. "psubw %[dest3_u], %[src0], %[src1] \n\t"
  3661. "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
  3662. "punpcklwd %[src0], %[dest3_v], %[g0] \n\t"
  3663. "punpckhwd %[src1], %[dest3_v], %[g0] \n\t"
  3664. "psubw %[dest3_v], %[src1], %[src0] \n\t"
  3665. "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
  3666. "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
  3667. "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
  3668. "packushb %[dest0_u], %[src0], %[src1] \n\t"
  3669. "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
  3670. "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
  3671. "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
  3672. "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
  3673. "packushb %[dest0_v], %[src0], %[src1] \n\t"
  3674. "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
  3675. "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
  3676. "daddiu %[src_argb4444], %[src_argb4444], 0x20 \n\t"
  3677. "daddiu %[next_argb4444], %[next_argb4444], 0x20 \n\t"
  3678. "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
  3679. "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
  3680. "daddiu %[width], %[width], -0x10 \n\t"
  3681. "bgtz %[width], 1b \n\t"
  3682. : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
  3683. [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
  3684. [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
  3685. [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
  3686. [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]),
  3687. [dest3_v] "=&f"(ftmp[12])
  3688. : [src_argb4444] "r"(src_argb4444),
  3689. [next_argb4444] "r"(src_stride_argb4444), [dst_u] "r"(dst_u),
  3690. [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value),
  3691. [c0] "f"(c0), [c1] "f"(c1), [mask] "f"(mask), [mask_u] "f"(mask_u),
  3692. [mask_v] "f"(mask_v), [eight] "f"(0x08), [four] "f"(0x04),
  3693. [two] "f"(0x02)
  3694. : "memory");
  3695. }
  3696. void ARGBToUV444Row_MMI(const uint8_t* src_argb,
  3697. uint8_t* dst_u,
  3698. uint8_t* dst_v,
  3699. int width) {
  3700. uint64_t ftmp[12];
  3701. const uint64_t value = 0x4040;
  3702. const uint64_t mask_u = 0x0026004a00700002;
  3703. const uint64_t mask_v = 0x00020070005e0012;
  3704. __asm__ volatile(
  3705. "1: \n\t"
  3706. "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
  3707. "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
  3708. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  3709. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  3710. "dsll %[dest0_u], %[src_lo], %[sixteen] \n\t"
  3711. "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
  3712. "pinsrh_3 %[dest0_v], %[src_lo], %[value] \n\t"
  3713. "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
  3714. "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
  3715. "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
  3716. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  3717. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  3718. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  3719. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  3720. "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
  3721. "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
  3722. "psubw %[dest0_u], %[src0], %[src1] \n\t"
  3723. "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
  3724. "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
  3725. "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
  3726. "psubw %[dest0_v], %[src1], %[src0] \n\t"
  3727. "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
  3728. "gsldrc1 %[src0], 0x08(%[src_argb]) \n\t"
  3729. "gsldlc1 %[src0], 0x0f(%[src_argb]) \n\t"
  3730. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  3731. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  3732. "dsll %[dest1_u], %[src_lo], %[sixteen] \n\t"
  3733. "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
  3734. "pinsrh_3 %[dest1_v], %[src_lo], %[value] \n\t"
  3735. "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
  3736. "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
  3737. "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
  3738. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  3739. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  3740. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  3741. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  3742. "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
  3743. "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
  3744. "psubw %[dest1_u], %[src0], %[src1] \n\t"
  3745. "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
  3746. "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
  3747. "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
  3748. "psubw %[dest1_v], %[src1], %[src0] \n\t"
  3749. "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
  3750. "gsldrc1 %[src0], 0x10(%[src_argb]) \n\t"
  3751. "gsldlc1 %[src0], 0x17(%[src_argb]) \n\t"
  3752. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  3753. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  3754. "dsll %[dest2_u], %[src_lo], %[sixteen] \n\t"
  3755. "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
  3756. "pinsrh_3 %[dest2_v], %[src_lo], %[value] \n\t"
  3757. "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
  3758. "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
  3759. "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
  3760. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  3761. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  3762. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  3763. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  3764. "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
  3765. "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
  3766. "psubw %[dest2_u], %[src0], %[src1] \n\t"
  3767. "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
  3768. "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
  3769. "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
  3770. "psubw %[dest2_v], %[src1], %[src0] \n\t"
  3771. "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
  3772. "gsldrc1 %[src0], 0x18(%[src_argb]) \n\t"
  3773. "gsldlc1 %[src0], 0x1f(%[src_argb]) \n\t"
  3774. "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
  3775. "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
  3776. "dsll %[dest3_u], %[src_lo], %[sixteen] \n\t"
  3777. "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
  3778. "pinsrh_3 %[dest3_v], %[src_lo], %[value] \n\t"
  3779. "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
  3780. "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
  3781. "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
  3782. "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
  3783. "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
  3784. "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
  3785. "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
  3786. "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
  3787. "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
  3788. "psubw %[dest3_u], %[src0], %[src1] \n\t"
  3789. "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
  3790. "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
  3791. "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
  3792. "psubw %[dest3_v], %[src1], %[src0] \n\t"
  3793. "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
  3794. "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
  3795. "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
  3796. "packushb %[dest0_u], %[src0], %[src1] \n\t"
  3797. "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
  3798. "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
  3799. "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
  3800. "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
  3801. "packushb %[dest0_v], %[src0], %[src1] \n\t"
  3802. "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
  3803. "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
  3804. "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
  3805. "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
  3806. "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
  3807. "daddi %[width], %[width], -0x08 \n\t"
  3808. "bgtz %[width], 1b \n\t"
  3809. : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
  3810. [src_hi] "=&f"(ftmp[3]), [dest0_u] "=&f"(ftmp[4]),
  3811. [dest0_v] "=&f"(ftmp[5]), [dest1_u] "=&f"(ftmp[6]),
  3812. [dest1_v] "=&f"(ftmp[7]), [dest2_u] "=&f"(ftmp[8]),
  3813. [dest2_v] "=&f"(ftmp[9]), [dest3_u] "=&f"(ftmp[10]),
  3814. [dest3_v] "=&f"(ftmp[11])
  3815. : [src_argb] "r"(src_argb), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
  3816. [width] "r"(width), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
  3817. [value] "f"(value), [zero] "f"(0x00), [sixteen] "f"(0x10),
  3818. [eight] "f"(0x08)
  3819. : "memory");
  3820. }
  3821. void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
  3822. uint64_t src, src_lo, src_hi, src37, dest, dest_lo, dest_hi;
  3823. uint64_t tmp0, tmp1;
  3824. const uint64_t mask0 = 0x0;
  3825. const uint64_t mask1 = 0x01;
  3826. const uint64_t mask2 = 0x0080004D0096001DULL;
  3827. const uint64_t mask3 = 0xFF000000FF000000ULL;
  3828. const uint64_t mask4 = ~mask3;
  3829. const uint64_t shift = 0x08;
  3830. __asm__ volatile(
  3831. "1: \n\t"
  3832. "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
  3833. "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
  3834. "and %[src37], %[src], %[mask3] \n\t"
  3835. "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
  3836. "pinsrh_3 %[src_lo], %[src_lo], %[mask1] \n\t"
  3837. "pmaddhw %[dest_lo], %[src_lo], %[mask2] \n\t"
  3838. "punpcklwd %[tmp0], %[dest_lo], %[dest_lo] \n\t"
  3839. "punpckhwd %[tmp1], %[dest_lo], %[dest_lo] \n\t"
  3840. "paddw %[dest_lo], %[tmp0], %[tmp1] \n\t"
  3841. "psrlw %[dest_lo], %[dest_lo], %[shift] \n\t"
  3842. "packsswh %[dest_lo], %[dest_lo], %[dest_lo] \n\t"
  3843. "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
  3844. "pinsrh_3 %[src_hi], %[src_hi], %[mask1] \n\t"
  3845. "pmaddhw %[dest_hi], %[src_hi], %[mask2] \n\t"
  3846. "punpcklwd %[tmp0], %[dest_hi], %[dest_hi] \n\t"
  3847. "punpckhwd %[tmp1], %[dest_hi], %[dest_hi] \n\t"
  3848. "paddw %[dest_hi], %[tmp0], %[tmp1] \n\t"
  3849. "psrlw %[dest_hi], %[dest_hi], %[shift] \n\t"
  3850. "packsswh %[dest_hi], %[dest_hi], %[dest_hi] \n\t"
  3851. "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
  3852. "and %[dest], %[dest], %[mask4] \n\t"
  3853. "or %[dest], %[dest], %[src37] \n\t"
  3854. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  3855. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  3856. "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
  3857. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  3858. "daddi %[width], %[width], -0x02 \n\t"
  3859. "bnez %[width], 1b \n\t"
  3860. : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
  3861. [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), [tmp0] "=&f"(tmp0),
  3862. [tmp1] "=&f"(tmp1), [src] "=&f"(src), [dest] "=&f"(dest),
  3863. [src37] "=&f"(src37)
  3864. : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width),
  3865. [shift] "f"(shift), [mask0] "f"(mask0), [mask1] "f"(mask1),
  3866. [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4)
  3867. : "memory");
  3868. }
  3869. // Convert a row of image to Sepia tone.
  3870. void ARGBSepiaRow_MMI(uint8_t* dst_argb, int width) {
  3871. uint64_t dest, dest_lo, dest_hi, dest37, dest0, dest1, dest2;
  3872. uint64_t tmp0, tmp1;
  3873. const uint64_t mask0 = 0x0;
  3874. const uint64_t mask1 = 0x002300440011ULL;
  3875. const uint64_t mask2 = 0x002D00580016ULL;
  3876. const uint64_t mask3 = 0x003200620018ULL;
  3877. const uint64_t mask4 = 0xFF000000FF000000ULL;
  3878. const uint64_t shift = 0x07;
  3879. __asm__ volatile(
  3880. "1: \n\t"
  3881. "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  3882. "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  3883. "and %[dest37], %[dest], %[mask4] \n\t"
  3884. "punpcklbh %[dest_lo], %[dest], %[mask0] \n\t"
  3885. "pmaddhw %[dest0], %[dest_lo], %[mask1] \n\t"
  3886. "pmaddhw %[dest1], %[dest_lo], %[mask2] \n\t"
  3887. "pmaddhw %[dest2], %[dest_lo], %[mask3] \n\t"
  3888. "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t"
  3889. "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t"
  3890. "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
  3891. "psrlw %[dest0], %[dest0], %[shift] \n\t"
  3892. "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t"
  3893. "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t"
  3894. "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
  3895. "psrlw %[dest1], %[dest1], %[shift] \n\t"
  3896. "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
  3897. "punpckhbh %[dest_hi], %[dest], %[mask0] \n\t"
  3898. "pmaddhw %[dest0], %[dest_hi], %[mask1] \n\t"
  3899. "pmaddhw %[dest1], %[dest_hi], %[mask2] \n\t"
  3900. "pmaddhw %[dest2], %[dest_hi], %[mask3] \n\t"
  3901. "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t"
  3902. "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t"
  3903. "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
  3904. "psrlw %[dest0], %[dest0], %[shift] \n\t"
  3905. "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t"
  3906. "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t"
  3907. "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
  3908. "psrlw %[dest1], %[dest1], %[shift] \n\t"
  3909. "packsswh %[dest_hi], %[dest0], %[dest1] \n\t"
  3910. "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
  3911. "or %[dest], %[dest], %[dest37] \n\t"
  3912. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  3913. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  3914. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  3915. "daddi %[width], %[width], -0x02 \n\t"
  3916. "bnez %[width], 1b \n\t"
  3917. : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
  3918. [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
  3919. [dest37] "=&f"(dest37), [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1),
  3920. [dest] "=&f"(dest)
  3921. : [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask0] "f"(mask0),
  3922. [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3),
  3923. [mask4] "f"(mask4), [shift] "f"(shift)
  3924. : "memory");
  3925. }
  3926. // Apply color matrix to a row of image. Matrix is signed.
  3927. // TODO(fbarchard): Consider adding rounding (+32).
  3928. void ARGBColorMatrixRow_MMI(const uint8_t* src_argb,
  3929. uint8_t* dst_argb,
  3930. const int8_t* matrix_argb,
  3931. int width) {
  3932. uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi, dest0, dest1, dest2,
  3933. dest3;
  3934. uint64_t matrix, matrix_hi, matrix_lo;
  3935. uint64_t tmp0, tmp1;
  3936. const uint64_t shift0 = 0x06;
  3937. const uint64_t shift1 = 0x08;
  3938. const uint64_t mask0 = 0x0;
  3939. const uint64_t mask1 = 0x08;
  3940. __asm__ volatile(
  3941. "1: \n\t"
  3942. "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
  3943. "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
  3944. "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
  3945. "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t"
  3946. "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t"
  3947. "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
  3948. "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
  3949. "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
  3950. "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
  3951. "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
  3952. "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
  3953. "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t"
  3954. "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t"
  3955. "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
  3956. "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
  3957. "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
  3958. "psraw %[dest0], %[dest0], %[shift0] \n\t"
  3959. "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t"
  3960. "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t"
  3961. "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
  3962. "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
  3963. "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
  3964. "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
  3965. "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
  3966. "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
  3967. "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t"
  3968. "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t"
  3969. "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
  3970. "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
  3971. "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
  3972. "psraw %[dest1], %[dest1], %[shift0] \n\t"
  3973. "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
  3974. "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t"
  3975. "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t"
  3976. "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
  3977. "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
  3978. "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
  3979. "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
  3980. "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
  3981. "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
  3982. "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t"
  3983. "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t"
  3984. "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
  3985. "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
  3986. "paddw %[dest2], %[tmp0], %[tmp1] \n\t"
  3987. "psraw %[dest2], %[dest2], %[shift0] \n\t"
  3988. "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t"
  3989. "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t"
  3990. "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
  3991. "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
  3992. "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
  3993. "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
  3994. "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
  3995. "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
  3996. "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t"
  3997. "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t"
  3998. "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
  3999. "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
  4000. "paddw %[dest3], %[tmp0], %[tmp1] \n\t"
  4001. "psraw %[dest3], %[dest3], %[shift0] \n\t"
  4002. "packsswh %[tmp0], %[dest0], %[dest1] \n\t"
  4003. "packsswh %[tmp1], %[dest2], %[dest3] \n\t"
  4004. "packushb %[dest], %[tmp0], %[tmp1] \n\t"
  4005. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  4006. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  4007. "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
  4008. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  4009. "daddi %[width], %[width], -0x02 \n\t"
  4010. "bnez %[width], 1b \n\t"
  4011. : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
  4012. [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
  4013. [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
  4014. [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest),
  4015. [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [matrix_hi] "=&f"(matrix_hi),
  4016. [matrix_lo] "=&f"(matrix_lo), [matrix] "=&f"(matrix)
  4017. : [src_ptr] "r"(src_argb), [matrix_ptr] "r"(matrix_argb),
  4018. [dst_ptr] "r"(dst_argb), [width] "r"(width), [shift0] "f"(shift0),
  4019. [shift1] "f"(shift1), [mask0] "f"(mask0), [mask1] "f"(mask1)
  4020. : "memory");
  4021. }
  4022. void ARGBShadeRow_MMI(const uint8_t* src_argb,
  4023. uint8_t* dst_argb,
  4024. int width,
  4025. uint32_t value) {
  4026. uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi;
  4027. const uint64_t shift = 0x08;
  4028. __asm__ volatile(
  4029. "1: \n\t"
  4030. "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
  4031. "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
  4032. "punpcklbh %[src_lo], %[src], %[src] \n\t"
  4033. "punpckhbh %[src_hi], %[src], %[src] \n\t"
  4034. "punpcklbh %[value], %[value], %[value] \n\t"
  4035. "pmulhuh %[dest_lo], %[src_lo], %[value] \n\t"
  4036. "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
  4037. "pmulhuh %[dest_hi], %[src_hi], %[value] \n\t"
  4038. "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
  4039. "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
  4040. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  4041. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  4042. "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
  4043. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  4044. "daddi %[width], %[width], -0x02 \n\t"
  4045. "bnez %[width], 1b \n\t"
  4046. : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
  4047. [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src] "=&f"(src),
  4048. [dest] "=&f"(dest)
  4049. : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width),
  4050. [value] "f"(value), [shift] "f"(shift)
  4051. : "memory");
  4052. }
  4053. void ARGBMultiplyRow_MMI(const uint8_t* src_argb0,
  4054. const uint8_t* src_argb1,
  4055. uint8_t* dst_argb,
  4056. int width) {
  4057. uint64_t src0, src0_hi, src0_lo, src1, src1_hi, src1_lo;
  4058. uint64_t dest, dest_lo, dest_hi;
  4059. const uint64_t mask = 0x0;
  4060. __asm__ volatile(
  4061. "1: \n\t"
  4062. "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
  4063. "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
  4064. "punpcklbh %[src0_lo], %[src0], %[src0] \n\t"
  4065. "punpckhbh %[src0_hi], %[src0], %[src0] \n\t"
  4066. "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
  4067. "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
  4068. "punpcklbh %[src1_lo], %[src1], %[mask] \n\t"
  4069. "punpckhbh %[src1_hi], %[src1], %[mask] \n\t"
  4070. "pmulhuh %[dest_lo], %[src0_lo], %[src1_lo] \n\t"
  4071. "pmulhuh %[dest_hi], %[src0_hi], %[src1_hi] \n\t"
  4072. "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
  4073. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  4074. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  4075. "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
  4076. "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
  4077. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  4078. "daddi %[width], %[width], -0x02 \n\t"
  4079. "bnez %[width], 1b \n\t"
  4080. : [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
  4081. [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
  4082. [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src0] "=&f"(src0),
  4083. [src1] "=&f"(src1), [dest] "=&f"(dest)
  4084. : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
  4085. [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask] "f"(mask)
  4086. : "memory");
  4087. }
  4088. void ARGBAddRow_MMI(const uint8_t* src_argb0,
  4089. const uint8_t* src_argb1,
  4090. uint8_t* dst_argb,
  4091. int width) {
  4092. uint64_t src0, src1, dest;
  4093. __asm__ volatile(
  4094. "1: \n\t"
  4095. "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
  4096. "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
  4097. "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
  4098. "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
  4099. "paddusb %[dest], %[src0], %[src1] \n\t"
  4100. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  4101. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  4102. "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
  4103. "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
  4104. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  4105. "daddi %[width], %[width], -0x02 \n\t"
  4106. "bnez %[width], 1b \n\t"
  4107. : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
  4108. : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
  4109. [dst_ptr] "r"(dst_argb), [width] "r"(width)
  4110. : "memory");
  4111. }
  4112. void ARGBSubtractRow_MMI(const uint8_t* src_argb0,
  4113. const uint8_t* src_argb1,
  4114. uint8_t* dst_argb,
  4115. int width) {
  4116. uint64_t src0, src1, dest;
  4117. __asm__ volatile(
  4118. "1: \n\t"
  4119. "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
  4120. "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
  4121. "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
  4122. "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
  4123. "psubusb %[dest], %[src0], %[src1] \n\t"
  4124. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  4125. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  4126. "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
  4127. "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
  4128. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  4129. "daddi %[width], %[width], -0x02 \n\t"
  4130. "bnez %[width], 1b \n\t"
  4131. : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
  4132. : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
  4133. [dst_ptr] "r"(dst_argb), [width] "r"(width)
  4134. : "memory");
  4135. }
  4136. // Sobel functions which mimics SSSE3.
  4137. void SobelXRow_MMI(const uint8_t* src_y0,
  4138. const uint8_t* src_y1,
  4139. const uint8_t* src_y2,
  4140. uint8_t* dst_sobelx,
  4141. int width) {
  4142. uint64_t y00 = 0, y10 = 0, y20 = 0;
  4143. uint64_t y02 = 0, y12 = 0, y22 = 0;
  4144. uint64_t zero = 0x0;
  4145. uint64_t sobel = 0x0;
  4146. __asm__ volatile(
  4147. "1: \n\t"
  4148. "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i]
  4149. "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t"
  4150. "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // a_sub=src_y0[i+2]
  4151. "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t"
  4152. "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // b=src_y1[i]
  4153. "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t"
  4154. "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // b_sub=src_y1[i+2]
  4155. "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t"
  4156. "gsldlc1 %[y20], 0x07(%[src_y2]) \n\t" // c=src_y2[i]
  4157. "gsldrc1 %[y20], 0x00(%[src_y2]) \n\t"
  4158. "gsldlc1 %[y22], 0x09(%[src_y2]) \n\t" // c_sub=src_y2[i+2]
  4159. "gsldrc1 %[y22], 0x02(%[src_y2]) \n\t"
  4160. "punpcklbh %[y00], %[y00], %[zero] \n\t"
  4161. "punpcklbh %[y10], %[y10], %[zero] \n\t"
  4162. "punpcklbh %[y20], %[y20], %[zero] \n\t"
  4163. "punpcklbh %[y02], %[y02], %[zero] \n\t"
  4164. "punpcklbh %[y12], %[y12], %[zero] \n\t"
  4165. "punpcklbh %[y22], %[y22], %[zero] \n\t"
  4166. "paddh %[y00], %[y00], %[y10] \n\t" // a+b
  4167. "paddh %[y20], %[y20], %[y10] \n\t" // c+b
  4168. "paddh %[y00], %[y00], %[y20] \n\t" // a+2b+c
  4169. "paddh %[y02], %[y02], %[y12] \n\t" // a_sub+b_sub
  4170. "paddh %[y22], %[y22], %[y12] \n\t" // c_sub+b_sub
  4171. "paddh %[y02], %[y02], %[y22] \n\t" // a_sub+2b_sub+c_sub
  4172. "pmaxsh %[y10], %[y00], %[y02] \n\t"
  4173. "pminsh %[y20], %[y00], %[y02] \n\t"
  4174. "psubh %[sobel], %[y10], %[y20] \n\t" // Abs
  4175. "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t"
  4176. "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t"
  4177. "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t"
  4178. "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t"
  4179. "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t"
  4180. "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t"
  4181. "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t"
  4182. "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t"
  4183. "gsldlc1 %[y20], 0x0B(%[src_y2]) \n\t"
  4184. "gsldrc1 %[y20], 0x04(%[src_y2]) \n\t"
  4185. "gsldlc1 %[y22], 0x0D(%[src_y2]) \n\t"
  4186. "gsldrc1 %[y22], 0x06(%[src_y2]) \n\t"
  4187. "punpcklbh %[y00], %[y00], %[zero] \n\t"
  4188. "punpcklbh %[y10], %[y10], %[zero] \n\t"
  4189. "punpcklbh %[y20], %[y20], %[zero] \n\t"
  4190. "punpcklbh %[y02], %[y02], %[zero] \n\t"
  4191. "punpcklbh %[y12], %[y12], %[zero] \n\t"
  4192. "punpcklbh %[y22], %[y22], %[zero] \n\t"
  4193. "paddh %[y00], %[y00], %[y10] \n\t"
  4194. "paddh %[y20], %[y20], %[y10] \n\t"
  4195. "paddh %[y00], %[y00], %[y20] \n\t"
  4196. "paddh %[y02], %[y02], %[y12] \n\t"
  4197. "paddh %[y22], %[y22], %[y12] \n\t"
  4198. "paddh %[y02], %[y02], %[y22] \n\t"
  4199. "pmaxsh %[y10], %[y00], %[y02] \n\t"
  4200. "pminsh %[y20], %[y00], %[y02] \n\t"
  4201. "psubh %[y00], %[y10], %[y20] \n\t"
  4202. "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
  4203. "gssdrc1 %[sobel], 0(%[dst_sobelx]) \n\t"
  4204. "gssdlc1 %[sobel], 7(%[dst_sobelx]) \n\t"
  4205. "daddiu %[src_y0], %[src_y0], 8 \n\t"
  4206. "daddiu %[src_y1], %[src_y1], 8 \n\t"
  4207. "daddiu %[src_y2], %[src_y2], 8 \n\t"
  4208. "daddiu %[dst_sobelx], %[dst_sobelx], 8 \n\t"
  4209. "daddiu %[width], %[width], -8 \n\t"
  4210. "bgtz %[width], 1b \n\t"
  4211. "nop \n\t"
  4212. : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y10] "=&f"(y10),
  4213. [y20] "=&f"(y20), [y02] "=&f"(y02), [y12] "=&f"(y12), [y22] "=&f"(y22)
  4214. : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1), [src_y2] "r"(src_y2),
  4215. [dst_sobelx] "r"(dst_sobelx), [width] "r"(width), [zero] "f"(zero)
  4216. : "memory");
  4217. }
  4218. void SobelYRow_MMI(const uint8_t* src_y0,
  4219. const uint8_t* src_y1,
  4220. uint8_t* dst_sobely,
  4221. int width) {
  4222. uint64_t y00 = 0, y01 = 0, y02 = 0;
  4223. uint64_t y10 = 0, y11 = 0, y12 = 0;
  4224. uint64_t zero = 0x0;
  4225. uint64_t sobel = 0x0;
  4226. __asm__ volatile(
  4227. "1: \n\t"
  4228. "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i]
  4229. "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t"
  4230. "gsldlc1 %[y01], 0x08(%[src_y0]) \n\t" // b=src_y0[i+1]
  4231. "gsldrc1 %[y01], 0x01(%[src_y0]) \n\t"
  4232. "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // c=src_y0[i+2]
  4233. "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t"
  4234. "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // a_sub=src_y1[i]
  4235. "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t"
  4236. "gsldlc1 %[y11], 0x08(%[src_y1]) \n\t" // b_sub=src_y1[i+1]
  4237. "gsldrc1 %[y11], 0x01(%[src_y1]) \n\t"
  4238. "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // c_sub=src_y1[i+2]
  4239. "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t"
  4240. "punpcklbh %[y00], %[y00], %[zero] \n\t"
  4241. "punpcklbh %[y01], %[y01], %[zero] \n\t"
  4242. "punpcklbh %[y02], %[y02], %[zero] \n\t"
  4243. "punpcklbh %[y10], %[y10], %[zero] \n\t"
  4244. "punpcklbh %[y11], %[y11], %[zero] \n\t"
  4245. "punpcklbh %[y12], %[y12], %[zero] \n\t"
  4246. "paddh %[y00], %[y00], %[y01] \n\t" // a+b
  4247. "paddh %[y02], %[y02], %[y01] \n\t" // c+b
  4248. "paddh %[y00], %[y00], %[y02] \n\t" // a+2b+c
  4249. "paddh %[y10], %[y10], %[y11] \n\t" // a_sub+b_sub
  4250. "paddh %[y12], %[y12], %[y11] \n\t" // c_sub+b_sub
  4251. "paddh %[y10], %[y10], %[y12] \n\t" // a_sub+2b_sub+c_sub
  4252. "pmaxsh %[y02], %[y00], %[y10] \n\t"
  4253. "pminsh %[y12], %[y00], %[y10] \n\t"
  4254. "psubh %[sobel], %[y02], %[y12] \n\t" // Abs
  4255. "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t"
  4256. "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t"
  4257. "gsldlc1 %[y01], 0x0C(%[src_y0]) \n\t"
  4258. "gsldrc1 %[y01], 0x05(%[src_y0]) \n\t"
  4259. "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t"
  4260. "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t"
  4261. "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t"
  4262. "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t"
  4263. "gsldlc1 %[y11], 0x0C(%[src_y1]) \n\t"
  4264. "gsldrc1 %[y11], 0x05(%[src_y1]) \n\t"
  4265. "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t"
  4266. "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t"
  4267. "punpcklbh %[y00], %[y00], %[zero] \n\t"
  4268. "punpcklbh %[y01], %[y01], %[zero] \n\t"
  4269. "punpcklbh %[y02], %[y02], %[zero] \n\t"
  4270. "punpcklbh %[y10], %[y10], %[zero] \n\t"
  4271. "punpcklbh %[y11], %[y11], %[zero] \n\t"
  4272. "punpcklbh %[y12], %[y12], %[zero] \n\t"
  4273. "paddh %[y00], %[y00], %[y01] \n\t"
  4274. "paddh %[y02], %[y02], %[y01] \n\t"
  4275. "paddh %[y00], %[y00], %[y02] \n\t"
  4276. "paddh %[y10], %[y10], %[y11] \n\t"
  4277. "paddh %[y12], %[y12], %[y11] \n\t"
  4278. "paddh %[y10], %[y10], %[y12] \n\t"
  4279. "pmaxsh %[y02], %[y00], %[y10] \n\t"
  4280. "pminsh %[y12], %[y00], %[y10] \n\t"
  4281. "psubh %[y00], %[y02], %[y12] \n\t"
  4282. "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
  4283. "gssdrc1 %[sobel], 0(%[dst_sobely]) \n\t"
  4284. "gssdlc1 %[sobel], 7(%[dst_sobely]) \n\t"
  4285. "daddiu %[src_y0], %[src_y0], 8 \n\t"
  4286. "daddiu %[src_y1], %[src_y1], 8 \n\t"
  4287. "daddiu %[dst_sobely], %[dst_sobely], 8 \n\t"
  4288. "daddiu %[width], %[width], -8 \n\t"
  4289. "bgtz %[width], 1b \n\t"
  4290. "nop \n\t"
  4291. : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y01] "=&f"(y01),
  4292. [y02] "=&f"(y02), [y10] "=&f"(y10), [y11] "=&f"(y11), [y12] "=&f"(y12)
  4293. : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1),
  4294. [dst_sobely] "r"(dst_sobely), [width] "r"(width), [zero] "f"(zero)
  4295. : "memory");
  4296. }
  4297. void SobelRow_MMI(const uint8_t* src_sobelx,
  4298. const uint8_t* src_sobely,
  4299. uint8_t* dst_argb,
  4300. int width) {
  4301. double temp[3];
  4302. uint64_t c1 = 0xff000000ff000000;
  4303. __asm__ volatile(
  4304. "1: \n\t"
  4305. "gsldlc1 %[t0], 0x07(%[src_sobelx]) \n\t" // a=src_sobelx[i]
  4306. "gsldrc1 %[t0], 0x00(%[src_sobelx]) \n\t"
  4307. "gsldlc1 %[t1], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i]
  4308. "gsldrc1 %[t1], 0x00(%[src_sobely]) \n\t"
  4309. // s7 s6 s5 s4 s3 s2 s1 s0 = a+b
  4310. "paddusb %[t2] , %[t0], %[t1] \n\t"
  4311. // s3 s2 s1 s0->s3 s3 s2 s2 s1 s1 s0 s0
  4312. "punpcklbh %[t0], %[t2], %[t2] \n\t"
  4313. // s1 s1 s0 s0->s1 s2 s1 s1 s0 s0 s0 s0
  4314. "punpcklbh %[t1], %[t0], %[t0] \n\t"
  4315. "or %[t1], %[t1], %[c1] \n\t"
  4316. // 255 s1 s1 s1 s55 s0 s0 s0
  4317. "gssdrc1 %[t1], 0x00(%[dst_argb]) \n\t"
  4318. "gssdlc1 %[t1], 0x07(%[dst_argb]) \n\t"
  4319. // s3 s3 s2 s2->s3 s3 s3 s3 s2 s2 s2 s2
  4320. "punpckhbh %[t1], %[t0], %[t0] \n\t"
  4321. "or %[t1], %[t1], %[c1] \n\t"
  4322. // 255 s3 s3 s3 255 s2 s2 s2
  4323. "gssdrc1 %[t1], 0x08(%[dst_argb]) \n\t"
  4324. "gssdlc1 %[t1], 0x0f(%[dst_argb]) \n\t"
  4325. // s7 s6 s5 s4->s7 s7 s6 s6 s5 s5 s4 s4
  4326. "punpckhbh %[t0], %[t2], %[t2] \n\t"
  4327. // s5 s5 s4 s4->s5 s5 s5 s5 s4 s4 s4 s4
  4328. "punpcklbh %[t1], %[t0], %[t0] \n\t"
  4329. "or %[t1], %[t1], %[c1] \n\t"
  4330. "gssdrc1 %[t1], 0x10(%[dst_argb]) \n\t"
  4331. "gssdlc1 %[t1], 0x17(%[dst_argb]) \n\t"
  4332. // s7 s7 s6 s6->s7 s7 s7 s7 s6 s6 s6 s6
  4333. "punpckhbh %[t1], %[t0], %[t0] \n\t"
  4334. "or %[t1], %[t1], %[c1] \n\t"
  4335. "gssdrc1 %[t1], 0x18(%[dst_argb]) \n\t"
  4336. "gssdlc1 %[t1], 0x1f(%[dst_argb]) \n\t"
  4337. "daddiu %[dst_argb], %[dst_argb], 32 \n\t"
  4338. "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
  4339. "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
  4340. "daddiu %[width], %[width], -8 \n\t"
  4341. "bgtz %[width], 1b \n\t"
  4342. "nop \n\t"
  4343. : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2])
  4344. : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
  4345. [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1)
  4346. : "memory");
  4347. }
  4348. void SobelToPlaneRow_MMI(const uint8_t* src_sobelx,
  4349. const uint8_t* src_sobely,
  4350. uint8_t* dst_y,
  4351. int width) {
  4352. uint64_t tr = 0;
  4353. uint64_t tb = 0;
  4354. __asm__ volatile(
  4355. "1: \n\t"
  4356. "gsldrc1 %[tr], 0x0(%[src_sobelx]) \n\t"
  4357. "gsldlc1 %[tr], 0x7(%[src_sobelx]) \n\t" // r=src_sobelx[i]
  4358. "gsldrc1 %[tb], 0x0(%[src_sobely]) \n\t"
  4359. "gsldlc1 %[tb], 0x7(%[src_sobely]) \n\t" // b=src_sobely[i]
  4360. "paddusb %[tr], %[tr], %[tb] \n\t" // g
  4361. "gssdrc1 %[tr], 0x0(%[dst_y]) \n\t"
  4362. "gssdlc1 %[tr], 0x7(%[dst_y]) \n\t"
  4363. "daddiu %[dst_y], %[dst_y], 8 \n\t"
  4364. "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
  4365. "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
  4366. "daddiu %[width], %[width], -8 \n\t"
  4367. "bgtz %[width], 1b \n\t"
  4368. "nop \n\t"
  4369. : [tr] "=&f"(tr), [tb] "=&f"(tb)
  4370. : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
  4371. [dst_y] "r"(dst_y), [width] "r"(width)
  4372. : "memory");
  4373. }
  4374. void SobelXYRow_MMI(const uint8_t* src_sobelx,
  4375. const uint8_t* src_sobely,
  4376. uint8_t* dst_argb,
  4377. int width) {
  4378. uint64_t temp[3];
  4379. uint64_t result = 0;
  4380. uint64_t gb = 0;
  4381. uint64_t cr = 0;
  4382. uint64_t c1 = 0xffffffffffffffff;
  4383. __asm__ volatile(
  4384. "1: \n\t"
  4385. "gsldlc1 %[tr], 0x07(%[src_sobelx]) \n\t" // r=src_sobelx[i]
  4386. "gsldrc1 %[tr], 0x00(%[src_sobelx]) \n\t"
  4387. "gsldlc1 %[tb], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i]
  4388. "gsldrc1 %[tb], 0x00(%[src_sobely]) \n\t"
  4389. "paddusb %[tg] , %[tr], %[tb] \n\t" // g
  4390. // g3 b3 g2 b2 g1 b1 g0 b0
  4391. "punpcklbh %[gb], %[tb], %[tg] \n\t"
  4392. // c3 r3 r2 r2 c1 r1 c0 r0
  4393. "punpcklbh %[cr], %[tr], %[c1] \n\t"
  4394. // c1 r1 g1 b1 c0 r0 g0 b0
  4395. "punpcklhw %[result], %[gb], %[cr] \n\t"
  4396. "gssdrc1 %[result], 0x00(%[dst_argb]) \n\t"
  4397. "gssdlc1 %[result], 0x07(%[dst_argb]) \n\t"
  4398. // c3 r3 g3 b3 c2 r2 g2 b2
  4399. "punpckhhw %[result], %[gb], %[cr] \n\t"
  4400. "gssdrc1 %[result], 0x08(%[dst_argb]) \n\t"
  4401. "gssdlc1 %[result], 0x0f(%[dst_argb]) \n\t"
  4402. // g7 b7 g6 b6 g5 b5 g4 b4
  4403. "punpckhbh %[gb], %[tb], %[tg] \n\t"
  4404. // c7 r7 c6 r6 c5 r5 c4 r4
  4405. "punpckhbh %[cr], %[tr], %[c1] \n\t"
  4406. // c5 r5 g5 b5 c4 r4 g4 b4
  4407. "punpcklhw %[result], %[gb], %[cr] \n\t"
  4408. "gssdrc1 %[result], 0x10(%[dst_argb]) \n\t"
  4409. "gssdlc1 %[result], 0x17(%[dst_argb]) \n\t"
  4410. // c7 r7 g7 b7 c6 r6 g6 b6
  4411. "punpckhhw %[result], %[gb], %[cr] \n\t"
  4412. "gssdrc1 %[result], 0x18(%[dst_argb]) \n\t"
  4413. "gssdlc1 %[result], 0x1f(%[dst_argb]) \n\t"
  4414. "daddiu %[dst_argb], %[dst_argb], 32 \n\t"
  4415. "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
  4416. "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
  4417. "daddiu %[width], %[width], -8 \n\t"
  4418. "bgtz %[width], 1b \n\t"
  4419. "nop \n\t"
  4420. : [tr] "=&f"(temp[0]), [tb] "=&f"(temp[1]), [tg] "=&f"(temp[2]),
  4421. [gb] "=&f"(gb), [cr] "=&f"(cr), [result] "=&f"(result)
  4422. : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
  4423. [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1)
  4424. : "memory");
  4425. }
  4426. void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width) {
  4427. // Copy a Y to RGB.
  4428. uint64_t src, dest;
  4429. const uint64_t mask0 = 0x00ffffff00ffffffULL;
  4430. const uint64_t mask1 = ~mask0;
  4431. __asm__ volatile(
  4432. "1: \n\t"
  4433. "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t"
  4434. "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t"
  4435. "punpcklbh %[src], %[src], %[src] \n\t"
  4436. "punpcklhw %[dest], %[src], %[src] \n\t"
  4437. "and %[dest], %[dest], %[mask0] \n\t"
  4438. "or %[dest], %[dest], %[mask1] \n\t"
  4439. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  4440. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  4441. "punpckhhw %[dest], %[src], %[src] \n\t"
  4442. "and %[dest], %[dest], %[mask0] \n\t"
  4443. "or %[dest], %[dest], %[mask1] \n\t"
  4444. "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
  4445. "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
  4446. "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t"
  4447. "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
  4448. "daddi %[width], %[width], -0x04 \n\t"
  4449. "bnez %[width], 1b \n\t"
  4450. : [src] "=&f"(src), [dest] "=&f"(dest)
  4451. : [src_ptr] "r"(src_y), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
  4452. [mask1] "f"(mask1), [width] "r"(width)
  4453. : "memory");
  4454. }
  4455. // TODO - respect YuvConstants
  4456. void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf,
  4457. const struct YuvConstants*, int width) {
  4458. uint64_t src, src_lo, src_hi, dest, dest_lo, dest_hi;
  4459. const uint64_t mask0 = 0x0;
  4460. const uint64_t mask1 = 0x55;
  4461. const uint64_t mask2 = 0xAA;
  4462. const uint64_t mask3 = 0xFF;
  4463. const uint64_t mask4 = 0x4A354A354A354A35ULL;
  4464. const uint64_t mask5 = 0x0488048804880488ULL;
  4465. const uint64_t shift0 = 0x08;
  4466. const uint64_t shift1 = 0x06;
  4467. __asm__ volatile(
  4468. "1: \n\t"
  4469. "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
  4470. "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
  4471. "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
  4472. "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
  4473. "pshufh %[src], %[src_lo], %[mask0] \n\t"
  4474. "psllh %[dest_lo], %[src], %[shift0] \n\t"
  4475. "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
  4476. "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
  4477. "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
  4478. "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
  4479. "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
  4480. "pshufh %[src], %[src_lo], %[mask1] \n\t"
  4481. "psllh %[dest_hi], %[src], %[shift0] \n\t"
  4482. "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
  4483. "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
  4484. "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
  4485. "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
  4486. "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
  4487. "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
  4488. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  4489. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  4490. "pshufh %[src], %[src_lo], %[mask2] \n\t"
  4491. "psllh %[dest_lo], %[src], %[shift0] \n\t"
  4492. "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
  4493. "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
  4494. "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
  4495. "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
  4496. "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
  4497. "pshufh %[src], %[src_lo], %[mask3] \n\t"
  4498. "psllh %[dest_hi], %[src], %[shift0] \n\t"
  4499. "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
  4500. "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
  4501. "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
  4502. "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
  4503. "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
  4504. "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
  4505. "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
  4506. "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
  4507. "pshufh %[src], %[src_hi], %[mask0] \n\t"
  4508. "psllh %[dest_lo], %[src], %[shift0] \n\t"
  4509. "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
  4510. "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
  4511. "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
  4512. "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
  4513. "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
  4514. "pshufh %[src], %[src_hi], %[mask1] \n\t"
  4515. "psllh %[dest_hi], %[src], %[shift0] \n\t"
  4516. "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
  4517. "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
  4518. "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
  4519. "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
  4520. "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
  4521. "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
  4522. "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
  4523. "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
  4524. "pshufh %[src], %[src_hi], %[mask2] \n\t"
  4525. "psllh %[dest_lo], %[src], %[shift0] \n\t"
  4526. "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
  4527. "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
  4528. "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
  4529. "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
  4530. "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
  4531. "pshufh %[src], %[src_hi], %[mask3] \n\t"
  4532. "psllh %[dest_hi], %[src], %[shift0] \n\t"
  4533. "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
  4534. "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
  4535. "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
  4536. "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
  4537. "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
  4538. "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
  4539. "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
  4540. "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
  4541. "daddi %[src_ptr], %[src_ptr], 0x08 \n\t"
  4542. "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t"
  4543. "daddi %[width], %[width], -0x08 \n\t"
  4544. "bnez %[width], 1b \n\t"
  4545. : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
  4546. [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi),
  4547. [dest_lo] "=&f"(dest_lo)
  4548. : [src_ptr] "r"(src_y), [dst_ptr] "r"(rgb_buf), [mask0] "f"(mask0),
  4549. [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3),
  4550. [mask4] "f"(mask4), [mask5] "f"(mask5), [shift0] "f"(shift0),
  4551. [shift1] "f"(shift1), [width] "r"(width)
  4552. : "memory");
  4553. }
  4554. void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
  4555. uint64_t source, src0, src1, dest;
  4556. const uint64_t mask0 = 0x0;
  4557. const uint64_t mask1 = 0x1b;
  4558. src += width - 1;
  4559. __asm__ volatile(
  4560. "1: \n\t"
  4561. "gsldlc1 %[source], 0(%[src_ptr]) \n\t"
  4562. "gsldrc1 %[source], -7(%[src_ptr]) \n\t"
  4563. "punpcklbh %[src0], %[source], %[mask0] \n\t"
  4564. "pshufh %[src0], %[src0], %[mask1] \n\t"
  4565. "punpckhbh %[src1], %[source], %[mask0] \n\t"
  4566. "pshufh %[src1], %[src1], %[mask1] \n\t"
  4567. "packushb %[dest], %[src1], %[src0] \n\t"
  4568. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  4569. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  4570. "daddi %[src_ptr], %[src_ptr], -0x08 \n\t"
  4571. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  4572. "daddi %[width], %[width], -0x08 \n\t"
  4573. "bnez %[width], 1b \n\t"
  4574. : [source] "=&f"(source), [dest] "=&f"(dest), [src0] "=&f"(src0),
  4575. [src1] "=&f"(src1)
  4576. : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
  4577. [mask1] "f"(mask1), [width] "r"(width)
  4578. : "memory");
  4579. }
  4580. void MirrorSplitUVRow_MMI(const uint8_t* src_uv,
  4581. uint8_t* dst_u,
  4582. uint8_t* dst_v,
  4583. int width) {
  4584. uint64_t src0, src1, dest0, dest1;
  4585. const uint64_t mask0 = 0x00ff00ff00ff00ffULL;
  4586. const uint64_t mask1 = 0x1b;
  4587. const uint64_t shift = 0x08;
  4588. src_uv += (width - 1) << 1;
  4589. __asm__ volatile(
  4590. "1: \n\t"
  4591. "gsldlc1 %[src0], 1(%[src_ptr]) \n\t"
  4592. "gsldrc1 %[src0], -6(%[src_ptr]) \n\t"
  4593. "gsldlc1 %[src1], -7(%[src_ptr]) \n\t"
  4594. "gsldrc1 %[src1], -14(%[src_ptr]) \n\t"
  4595. "and %[dest0], %[src0], %[mask0] \n\t"
  4596. "pshufh %[dest0], %[dest0], %[mask1] \n\t"
  4597. "and %[dest1], %[src1], %[mask0] \n\t"
  4598. "pshufh %[dest1], %[dest1], %[mask1] \n\t"
  4599. "packushb %[dest0], %[dest0], %[dest1] \n\t"
  4600. "gssdlc1 %[dest0], 0x07(%[dstu_ptr]) \n\t"
  4601. "gssdrc1 %[dest0], 0x00(%[dstu_ptr]) \n\t"
  4602. "psrlh %[dest0], %[src0], %[shift] \n\t"
  4603. "pshufh %[dest0], %[dest0], %[mask1] \n\t"
  4604. "psrlh %[dest1], %[src1], %[shift] \n\t"
  4605. "pshufh %[dest1], %[dest1], %[mask1] \n\t"
  4606. "packushb %[dest0], %[dest0], %[dest1] \n\t"
  4607. "gssdlc1 %[dest0], 0x07(%[dstv_ptr]) \n\t"
  4608. "gssdrc1 %[dest0], 0x00(%[dstv_ptr]) \n\t"
  4609. "daddi %[src_ptr], %[src_ptr], -0x10 \n\t"
  4610. "daddiu %[dstu_ptr], %[dstu_ptr], 0x08 \n\t"
  4611. "daddiu %[dstv_ptr], %[dstv_ptr], 0x08 \n\t"
  4612. "daddi %[width], %[width], -0x08 \n\t"
  4613. "bnez %[width], 1b \n\t"
  4614. : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0),
  4615. [src1] "=&f"(src1)
  4616. : [src_ptr] "r"(src_uv), [dstu_ptr] "r"(dst_u), [dstv_ptr] "r"(dst_v),
  4617. [width] "r"(width), [mask0] "f"(mask0), [mask1] "f"(mask1),
  4618. [shift] "f"(shift)
  4619. : "memory");
  4620. }
  4621. void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
  4622. src += (width - 1) * 4;
  4623. uint64_t temp = 0x0;
  4624. uint64_t shuff = 0x4e; // 01 00 11 10
  4625. __asm__ volatile(
  4626. "1: \n\t"
  4627. "gsldlc1 %[temp], 3(%[src]) \n\t"
  4628. "gsldrc1 %[temp], -4(%[src]) \n\t"
  4629. "pshufh %[temp], %[temp], %[shuff] \n\t"
  4630. "gssdrc1 %[temp], 0x0(%[dst]) \n\t"
  4631. "gssdlc1 %[temp], 0x7(%[dst]) \n\t"
  4632. "daddiu %[src], %[src], -0x08 \n\t"
  4633. "daddiu %[dst], %[dst], 0x08 \n\t"
  4634. "daddiu %[width], %[width], -0x02 \n\t"
  4635. "bnez %[width], 1b \n\t"
  4636. : [temp] "=&f"(temp)
  4637. : [src] "r"(src), [dst] "r"(dst), [width] "r"(width), [shuff] "f"(shuff)
  4638. : "memory");
  4639. }
  4640. void SplitUVRow_MMI(const uint8_t* src_uv,
  4641. uint8_t* dst_u,
  4642. uint8_t* dst_v,
  4643. int width) {
  4644. uint64_t c0 = 0x00ff00ff00ff00ff;
  4645. uint64_t temp[4];
  4646. uint64_t shift = 0x08;
  4647. __asm__ volatile(
  4648. "1: \n\t"
  4649. "gsldrc1 %[t0], 0x00(%[src_uv]) \n\t"
  4650. "gsldlc1 %[t0], 0x07(%[src_uv]) \n\t"
  4651. "gsldrc1 %[t1], 0x08(%[src_uv]) \n\t"
  4652. "gsldlc1 %[t1], 0x0f(%[src_uv]) \n\t"
  4653. "and %[t2], %[t0], %[c0] \n\t"
  4654. "and %[t3], %[t1], %[c0] \n\t"
  4655. "packushb %[t2], %[t2], %[t3] \n\t"
  4656. "gssdrc1 %[t2], 0x0(%[dst_u]) \n\t"
  4657. "gssdlc1 %[t2], 0x7(%[dst_u]) \n\t"
  4658. "psrlh %[t2], %[t0], %[shift] \n\t"
  4659. "psrlh %[t3], %[t1], %[shift] \n\t"
  4660. "packushb %[t2], %[t2], %[t3] \n\t"
  4661. "gssdrc1 %[t2], 0x0(%[dst_v]) \n\t"
  4662. "gssdlc1 %[t2], 0x7(%[dst_v]) \n\t"
  4663. "daddiu %[src_uv], %[src_uv], 16 \n\t"
  4664. "daddiu %[dst_u], %[dst_u], 8 \n\t"
  4665. "daddiu %[dst_v], %[dst_v], 8 \n\t"
  4666. "daddiu %[width], %[width], -8 \n\t"
  4667. "bgtz %[width], 1b \n\t"
  4668. "nop \n\t"
  4669. : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
  4670. [t3] "=&f"(temp[3])
  4671. : [src_uv] "r"(src_uv), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
  4672. [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift)
  4673. : "memory");
  4674. }
  4675. void MergeUVRow_MMI(const uint8_t* src_u,
  4676. const uint8_t* src_v,
  4677. uint8_t* dst_uv,
  4678. int width) {
  4679. uint64_t temp[3];
  4680. __asm__ volatile(
  4681. "1: \n\t"
  4682. "gsldrc1 %[t0], 0x0(%[src_u]) \n\t"
  4683. "gsldlc1 %[t0], 0x7(%[src_u]) \n\t"
  4684. "gsldrc1 %[t1], 0x0(%[src_v]) \n\t"
  4685. "gsldlc1 %[t1], 0x7(%[src_v]) \n\t"
  4686. "punpcklbh %[t2], %[t0], %[t1] \n\t"
  4687. "gssdrc1 %[t2], 0x0(%[dst_uv]) \n\t"
  4688. "gssdlc1 %[t2], 0x7(%[dst_uv]) \n\t"
  4689. "punpckhbh %[t2], %[t0], %[t1] \n\t"
  4690. "gssdrc1 %[t2], 0x8(%[dst_uv]) \n\t"
  4691. "gssdlc1 %[t2], 0xf(%[dst_uv]) \n\t"
  4692. "daddiu %[src_u], %[src_u], 8 \n\t"
  4693. "daddiu %[src_v], %[src_v], 8 \n\t"
  4694. "daddiu %[dst_uv], %[dst_uv], 16 \n\t"
  4695. "daddiu %[width], %[width], -8 \n\t"
  4696. "bgtz %[width], 1b \n\t"
  4697. "nop \n\t"
  4698. : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2])
  4699. : [dst_uv] "r"(dst_uv), [src_u] "r"(src_u), [src_v] "r"(src_v),
  4700. [width] "r"(width)
  4701. : "memory");
  4702. }
  4703. void SplitRGBRow_MMI(const uint8_t* src_rgb,
  4704. uint8_t* dst_r,
  4705. uint8_t* dst_g,
  4706. uint8_t* dst_b,
  4707. int width) {
  4708. uint64_t src[4];
  4709. uint64_t dest_hi, dest_lo, dest;
  4710. __asm__ volatile(
  4711. "1: \n\t"
  4712. "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
  4713. "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
  4714. "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
  4715. "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
  4716. "punpcklbh %[dest_lo], %[src0], %[src1] \n\t"
  4717. "gslwlc1 %[src2], 0x09(%[src_ptr]) \n\t"
  4718. "gslwrc1 %[src2], 0x06(%[src_ptr]) \n\t"
  4719. "gslwlc1 %[src3], 0x0c(%[src_ptr]) \n\t"
  4720. "gslwrc1 %[src3], 0x09(%[src_ptr]) \n\t"
  4721. "punpcklbh %[dest_hi], %[src2], %[src3] \n\t"
  4722. "punpcklhw %[dest], %[dest_lo], %[dest_hi] \n\t"
  4723. "gsswlc1 %[dest], 0x03(%[dstr_ptr]) \n\t"
  4724. "gsswrc1 %[dest], 0x00(%[dstr_ptr]) \n\t"
  4725. "punpckhwd %[dest], %[dest], %[dest] \n\t"
  4726. "gsswlc1 %[dest], 0x03(%[dstg_ptr]) \n\t"
  4727. "gsswrc1 %[dest], 0x00(%[dstg_ptr]) \n\t"
  4728. "punpckhhw %[dest], %[dest_lo], %[dest_hi] \n\t"
  4729. "gsswlc1 %[dest], 0x03(%[dstb_ptr]) \n\t"
  4730. "gsswrc1 %[dest], 0x00(%[dstb_ptr]) \n\t"
  4731. "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
  4732. "daddiu %[dstr_ptr], %[dstr_ptr], 0x04 \n\t"
  4733. "daddiu %[dstg_ptr], %[dstg_ptr], 0x04 \n\t"
  4734. "daddiu %[dstb_ptr], %[dstb_ptr], 0x04 \n\t"
  4735. "daddi %[width], %[width], -0x04 \n\t"
  4736. "bnez %[width], 1b \n\t"
  4737. : [src0] "=&f"(src[0]), [src1] "=&f"(src[1]), [src2] "=&f"(src[2]),
  4738. [src3] "=&f"(src[3]), [dest_hi] "=&f"(dest_hi),
  4739. [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
  4740. : [src_ptr] "r"(src_rgb), [dstr_ptr] "r"(dst_r), [dstg_ptr] "r"(dst_g),
  4741. [dstb_ptr] "r"(dst_b), [width] "r"(width)
  4742. : "memory");
  4743. }
  4744. void MergeRGBRow_MMI(const uint8_t* src_r,
  4745. const uint8_t* src_g,
  4746. const uint8_t* src_b,
  4747. uint8_t* dst_rgb,
  4748. int width) {
  4749. uint64_t srcr, srcg, srcb, dest;
  4750. uint64_t srcrg_hi, srcrg_lo, srcbz_hi, srcbz_lo;
  4751. const uint64_t temp = 0x0;
  4752. __asm__ volatile(
  4753. "1: \n\t"
  4754. "gsldlc1 %[srcr], 0x07(%[srcr_ptr]) \n\t"
  4755. "gsldrc1 %[srcr], 0x00(%[srcr_ptr]) \n\t"
  4756. "gsldlc1 %[srcg], 0x07(%[srcg_ptr]) \n\t"
  4757. "gsldrc1 %[srcg], 0x00(%[srcg_ptr]) \n\t"
  4758. "punpcklbh %[srcrg_lo], %[srcr], %[srcg] \n\t"
  4759. "punpckhbh %[srcrg_hi], %[srcr], %[srcg] \n\t"
  4760. "gsldlc1 %[srcb], 0x07(%[srcb_ptr]) \n\t"
  4761. "gsldrc1 %[srcb], 0x00(%[srcb_ptr]) \n\t"
  4762. "punpcklbh %[srcbz_lo], %[srcb], %[temp] \n\t"
  4763. "punpckhbh %[srcbz_hi], %[srcb], %[temp] \n\t"
  4764. "punpcklhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t"
  4765. "gsswlc1 %[dest], 0x03(%[dst_ptr]) \n\t"
  4766. "gsswrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  4767. "punpckhwd %[dest], %[dest], %[dest] \n\t"
  4768. "gsswlc1 %[dest], 0x06(%[dst_ptr]) \n\t"
  4769. "gsswrc1 %[dest], 0x03(%[dst_ptr]) \n\t"
  4770. "punpckhhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t"
  4771. "gsswlc1 %[dest], 0x09(%[dst_ptr]) \n\t"
  4772. "gsswrc1 %[dest], 0x06(%[dst_ptr]) \n\t"
  4773. "punpckhwd %[dest], %[dest], %[dest] \n\t"
  4774. "gsswlc1 %[dest], 0x0c(%[dst_ptr]) \n\t"
  4775. "gsswrc1 %[dest], 0x09(%[dst_ptr]) \n\t"
  4776. "punpcklhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t"
  4777. "gsswlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
  4778. "gsswrc1 %[dest], 0x0c(%[dst_ptr]) \n\t"
  4779. "punpckhwd %[dest], %[dest], %[dest] \n\t"
  4780. "gsswlc1 %[dest], 0x12(%[dst_ptr]) \n\t"
  4781. "gsswrc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
  4782. "punpckhhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t"
  4783. "gsswlc1 %[dest], 0x15(%[dst_ptr]) \n\t"
  4784. "gsswrc1 %[dest], 0x12(%[dst_ptr]) \n\t"
  4785. "punpckhwd %[dest], %[dest], %[dest] \n\t"
  4786. "gsswlc1 %[dest], 0x18(%[dst_ptr]) \n\t"
  4787. "gsswrc1 %[dest], 0x15(%[dst_ptr]) \n\t"
  4788. "daddiu %[srcr_ptr], %[srcr_ptr], 0x08 \n\t"
  4789. "daddiu %[srcg_ptr], %[srcg_ptr], 0x08 \n\t"
  4790. "daddiu %[srcb_ptr], %[srcb_ptr], 0x08 \n\t"
  4791. "daddiu %[dst_ptr], %[dst_ptr], 0x18 \n\t"
  4792. "daddi %[width], %[width], -0x08 \n\t"
  4793. "bnez %[width], 1b \n\t"
  4794. : [srcr] "=&f"(srcr), [srcg] "=&f"(srcg), [srcb] "=&f"(srcb),
  4795. [dest] "=&f"(dest), [srcrg_hi] "=&f"(srcrg_hi),
  4796. [srcrg_lo] "=&f"(srcrg_lo), [srcbz_hi] "=&f"(srcbz_hi),
  4797. [srcbz_lo] "=&f"(srcbz_lo)
  4798. : [srcr_ptr] "r"(src_r), [srcg_ptr] "r"(src_g), [srcb_ptr] "r"(src_b),
  4799. [dst_ptr] "r"(dst_rgb), [width] "r"(width), [temp] "f"(temp)
  4800. : "memory");
  4801. }
  4802. // Filter 2 rows of YUY2 UV's (422) into U and V (420).
  4803. void YUY2ToUVRow_MMI(const uint8_t* src_yuy2,
  4804. int src_stride_yuy2,
  4805. uint8_t* dst_u,
  4806. uint8_t* dst_v,
  4807. int width) {
  4808. uint64_t c0 = 0xff00ff00ff00ff00;
  4809. uint64_t c1 = 0x00ff00ff00ff00ff;
  4810. uint64_t temp[3];
  4811. uint64_t data[4];
  4812. uint64_t shift = 0x08;
  4813. uint64_t src_stride = 0x0;
  4814. __asm__ volatile(
  4815. "1: \n\t"
  4816. "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
  4817. "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
  4818. "daddu %[src_stride], %[src_yuy2], %[src_stride_yuy2] \n\t"
  4819. "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
  4820. "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
  4821. "pavgb %[t0], %[t0], %[t1] \n\t"
  4822. "gsldrc1 %[t2], 0x08(%[src_yuy2]) \n\t"
  4823. "gsldlc1 %[t2], 0x0f(%[src_yuy2]) \n\t"
  4824. "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
  4825. "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
  4826. "pavgb %[t1], %[t2], %[t1] \n\t"
  4827. "and %[t0], %[t0], %[c0] \n\t"
  4828. "and %[t1], %[t1], %[c0] \n\t"
  4829. "psrlh %[t0], %[t0], %[shift] \n\t"
  4830. "psrlh %[t1], %[t1], %[shift] \n\t"
  4831. "packushb %[t0], %[t0], %[t1] \n\t"
  4832. "mov.s %[t1], %[t0] \n\t"
  4833. "and %[d0], %[t0], %[c1] \n\t"
  4834. "psrlh %[d1], %[t1], %[shift] \n\t"
  4835. "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
  4836. "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
  4837. "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
  4838. "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
  4839. "pavgb %[t0], %[t0], %[t1] \n\t"
  4840. "gsldrc1 %[t2], 0x18(%[src_yuy2]) \n\t"
  4841. "gsldlc1 %[t2], 0x1f(%[src_yuy2]) \n\t"
  4842. "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
  4843. "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
  4844. "pavgb %[t1], %[t2], %[t1] \n\t"
  4845. "and %[t0], %[t0], %[c0] \n\t"
  4846. "and %[t1], %[t1], %[c0] \n\t"
  4847. "psrlh %[t0], %[t0], %[shift] \n\t"
  4848. "psrlh %[t1], %[t1], %[shift] \n\t"
  4849. "packushb %[t0], %[t0], %[t1] \n\t"
  4850. "mov.s %[t1], %[t0] \n\t"
  4851. "and %[d2], %[t0], %[c1] \n\t"
  4852. "psrlh %[d3], %[t1], %[shift] \n\t"
  4853. "packushb %[d0], %[d0], %[d2] \n\t"
  4854. "packushb %[d1], %[d1], %[d3] \n\t"
  4855. "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
  4856. "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
  4857. "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
  4858. "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
  4859. "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
  4860. "daddiu %[dst_u], %[dst_u], 8 \n\t"
  4861. "daddiu %[dst_v], %[dst_v], 8 \n\t"
  4862. "daddiu %[width], %[width], -16 \n\t"
  4863. "bgtz %[width], 1b \n\t"
  4864. "nop \n\t"
  4865. : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
  4866. [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]),
  4867. [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride)
  4868. : [src_yuy2] "r"(src_yuy2), [src_stride_yuy2] "r"(src_stride_yuy2),
  4869. [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
  4870. [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift)
  4871. : "memory");
  4872. }
  4873. // Copy row of YUY2 UV's (422) into U and V (422).
  4874. void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2,
  4875. uint8_t* dst_u,
  4876. uint8_t* dst_v,
  4877. int width) {
  4878. uint64_t c0 = 0xff00ff00ff00ff00;
  4879. uint64_t c1 = 0x00ff00ff00ff00ff;
  4880. uint64_t temp[2];
  4881. uint64_t data[4];
  4882. uint64_t shift = 0x08;
  4883. __asm__ volatile(
  4884. "1: \n\t"
  4885. "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
  4886. "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
  4887. "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
  4888. "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
  4889. "and %[t0], %[t0], %[c0] \n\t"
  4890. "and %[t1], %[t1], %[c0] \n\t"
  4891. "psrlh %[t0], %[t0], %[shift] \n\t"
  4892. "psrlh %[t1], %[t1], %[shift] \n\t"
  4893. "packushb %[t0], %[t0], %[t1] \n\t"
  4894. "mov.s %[t1], %[t0] \n\t"
  4895. "and %[d0], %[t0], %[c1] \n\t"
  4896. "psrlh %[d1], %[t1], %[shift] \n\t"
  4897. "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
  4898. "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
  4899. "gsldrc1 %[t1], 0x18(%[src_yuy2]) \n\t"
  4900. "gsldlc1 %[t1], 0x1f(%[src_yuy2]) \n\t"
  4901. "and %[t0], %[t0], %[c0] \n\t"
  4902. "and %[t1], %[t1], %[c0] \n\t"
  4903. "psrlh %[t0], %[t0], %[shift] \n\t"
  4904. "psrlh %[t1], %[t1], %[shift] \n\t"
  4905. "packushb %[t0], %[t0], %[t1] \n\t"
  4906. "mov.s %[t1], %[t0] \n\t"
  4907. "and %[d2], %[t0], %[c1] \n\t"
  4908. "psrlh %[d3], %[t1], %[shift] \n\t"
  4909. "packushb %[d0], %[d0], %[d2] \n\t"
  4910. "packushb %[d1], %[d1], %[d3] \n\t"
  4911. "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
  4912. "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
  4913. "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
  4914. "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
  4915. "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
  4916. "daddiu %[dst_u], %[dst_u], 8 \n\t"
  4917. "daddiu %[dst_v], %[dst_v], 8 \n\t"
  4918. "daddiu %[width], %[width], -16 \n\t"
  4919. "bgtz %[width], 1b \n\t"
  4920. "nop \n\t"
  4921. : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]),
  4922. [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
  4923. : [src_yuy2] "r"(src_yuy2), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
  4924. [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift)
  4925. : "memory");
  4926. }
  4927. // Copy row of YUY2 Y's (422) into Y (420/422).
  4928. void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
  4929. uint64_t c0 = 0x00ff00ff00ff00ff;
  4930. uint64_t temp[2];
  4931. __asm__ volatile(
  4932. "1: \n\t"
  4933. "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
  4934. "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
  4935. "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
  4936. "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
  4937. "and %[t0], %[t0], %[c0] \n\t"
  4938. "and %[t1], %[t1], %[c0] \n\t"
  4939. "packushb %[t0], %[t0], %[t1] \n\t"
  4940. "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
  4941. "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
  4942. "daddiu %[src_yuy2], %[src_yuy2], 16 \n\t"
  4943. "daddiu %[dst_y], %[dst_y], 8 \n\t"
  4944. "daddiu %[width], %[width], -8 \n\t"
  4945. "bgtz %[width], 1b \n\t"
  4946. "nop \n\t"
  4947. : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1])
  4948. : [src_yuy2] "r"(src_yuy2), [dst_y] "r"(dst_y), [width] "r"(width),
  4949. [c0] "f"(c0)
  4950. : "memory");
  4951. }
  4952. // Filter 2 rows of UYVY UV's (422) into U and V (420).
  4953. void UYVYToUVRow_MMI(const uint8_t* src_uyvy,
  4954. int src_stride_uyvy,
  4955. uint8_t* dst_u,
  4956. uint8_t* dst_v,
  4957. int width) {
  4958. // Output a row of UV values.
  4959. uint64_t c0 = 0x00ff00ff00ff00ff;
  4960. uint64_t temp[3];
  4961. uint64_t data[4];
  4962. uint64_t shift = 0x08;
  4963. uint64_t src_stride = 0x0;
  4964. __asm__ volatile(
  4965. "1: \n\t"
  4966. "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
  4967. "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
  4968. "daddu %[src_stride], %[src_uyvy], %[src_stride_uyvy] \n\t"
  4969. "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
  4970. "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
  4971. "pavgb %[t0], %[t0], %[t1] \n\t"
  4972. "gsldrc1 %[t2], 0x08(%[src_uyvy]) \n\t"
  4973. "gsldlc1 %[t2], 0x0f(%[src_uyvy]) \n\t"
  4974. "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
  4975. "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
  4976. "pavgb %[t1], %[t2], %[t1] \n\t"
  4977. "and %[t0], %[t0], %[c0] \n\t"
  4978. "and %[t1], %[t1], %[c0] \n\t"
  4979. "packushb %[t0], %[t0], %[t1] \n\t"
  4980. "mov.s %[t1], %[t0] \n\t"
  4981. "and %[d0], %[t0], %[c0] \n\t"
  4982. "psrlh %[d1], %[t1], %[shift] \n\t"
  4983. "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
  4984. "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
  4985. "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
  4986. "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
  4987. "pavgb %[t0], %[t0], %[t1] \n\t"
  4988. "gsldrc1 %[t2], 0x18(%[src_uyvy]) \n\t"
  4989. "gsldlc1 %[t2], 0x1f(%[src_uyvy]) \n\t"
  4990. "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
  4991. "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
  4992. "pavgb %[t1], %[t2], %[t1] \n\t"
  4993. "and %[t0], %[t0], %[c0] \n\t"
  4994. "and %[t1], %[t1], %[c0] \n\t"
  4995. "packushb %[t0], %[t0], %[t1] \n\t"
  4996. "mov.s %[t1], %[t0] \n\t"
  4997. "and %[d2], %[t0], %[c0] \n\t"
  4998. "psrlh %[d3], %[t1], %[shift] \n\t"
  4999. "packushb %[d0], %[d0], %[d2] \n\t"
  5000. "packushb %[d1], %[d1], %[d3] \n\t"
  5001. "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
  5002. "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
  5003. "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
  5004. "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
  5005. "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
  5006. "daddiu %[dst_u], %[dst_u], 8 \n\t"
  5007. "daddiu %[dst_v], %[dst_v], 8 \n\t"
  5008. "daddiu %[width], %[width], -16 \n\t"
  5009. "bgtz %[width], 1b \n\t"
  5010. "nop \n\t"
  5011. : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
  5012. [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]),
  5013. [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride)
  5014. : [src_uyvy] "r"(src_uyvy), [src_stride_uyvy] "r"(src_stride_uyvy),
  5015. [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
  5016. [c0] "f"(c0), [shift] "f"(shift)
  5017. : "memory");
  5018. }
  5019. // Copy row of UYVY UV's (422) into U and V (422).
  5020. void UYVYToUV422Row_MMI(const uint8_t* src_uyvy,
  5021. uint8_t* dst_u,
  5022. uint8_t* dst_v,
  5023. int width) {
  5024. // Output a row of UV values.
  5025. uint64_t c0 = 0x00ff00ff00ff00ff;
  5026. uint64_t temp[2];
  5027. uint64_t data[4];
  5028. uint64_t shift = 0x08;
  5029. __asm__ volatile(
  5030. "1: \n\t"
  5031. "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
  5032. "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
  5033. "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
  5034. "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
  5035. "and %[t0], %[t0], %[c0] \n\t"
  5036. "and %[t1], %[t1], %[c0] \n\t"
  5037. "packushb %[t0], %[t0], %[t1] \n\t"
  5038. "mov.s %[t1], %[t0] \n\t"
  5039. "and %[d0], %[t0], %[c0] \n\t"
  5040. "psrlh %[d1], %[t1], %[shift] \n\t"
  5041. "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
  5042. "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
  5043. "gsldrc1 %[t1], 0x18(%[src_uyvy]) \n\t"
  5044. "gsldlc1 %[t1], 0x1f(%[src_uyvy]) \n\t"
  5045. "and %[t0], %[t0], %[c0] \n\t"
  5046. "and %[t1], %[t1], %[c0] \n\t"
  5047. "packushb %[t0], %[t0], %[t1] \n\t"
  5048. "mov.s %[t1], %[t0] \n\t"
  5049. "and %[d2], %[t0], %[c0] \n\t"
  5050. "psrlh %[d3], %[t1], %[shift] \n\t"
  5051. "packushb %[d0], %[d0], %[d2] \n\t"
  5052. "packushb %[d1], %[d1], %[d3] \n\t"
  5053. "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
  5054. "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
  5055. "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
  5056. "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
  5057. "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
  5058. "daddiu %[dst_u], %[dst_u], 8 \n\t"
  5059. "daddiu %[dst_v], %[dst_v], 8 \n\t"
  5060. "daddiu %[width], %[width], -16 \n\t"
  5061. "bgtz %[width], 1b \n\t"
  5062. "nop \n\t"
  5063. : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]),
  5064. [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
  5065. : [src_uyvy] "r"(src_uyvy), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
  5066. [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift)
  5067. : "memory");
  5068. }
  5069. // Copy row of UYVY Y's (422) into Y (420/422).
  5070. void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
  5071. // Output a row of Y values.
  5072. uint64_t c0 = 0x00ff00ff00ff00ff;
  5073. uint64_t shift = 0x08;
  5074. uint64_t temp[2];
  5075. __asm__ volatile(
  5076. "1: \n\t"
  5077. "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
  5078. "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
  5079. "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
  5080. "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
  5081. "dsrl %[t0], %[t0], %[shift] \n\t"
  5082. "dsrl %[t1], %[t1], %[shift] \n\t"
  5083. "and %[t0], %[t0], %[c0] \n\t"
  5084. "and %[t1], %[t1], %[c0] \n\t"
  5085. "and %[t1], %[t1], %[c0] \n\t"
  5086. "packushb %[t0], %[t0], %[t1] \n\t"
  5087. "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
  5088. "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
  5089. "daddiu %[src_uyvy], %[src_uyvy], 16 \n\t"
  5090. "daddiu %[dst_y], %[dst_y], 8 \n\t"
  5091. "daddiu %[width], %[width], -8 \n\t"
  5092. "bgtz %[width], 1b \n\t"
  5093. "nop \n\t"
  5094. : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1])
  5095. : [src_uyvy] "r"(src_uyvy), [dst_y] "r"(dst_y), [width] "r"(width),
  5096. [c0] "f"(c0), [shift] "f"(shift)
  5097. : "memory");
  5098. }
  5099. // Blend src_argb0 over src_argb1 and store to dst_argb.
  5100. // dst_argb may be src_argb0 or src_argb1.
  5101. // This code mimics the SSSE3 version for better testability.
  5102. void ARGBBlendRow_MMI(const uint8_t* src_argb0,
  5103. const uint8_t* src_argb1,
  5104. uint8_t* dst_argb,
  5105. int width) {
  5106. uint64_t src0, src1, dest, alpha, src0_hi, src0_lo, src1_hi, src1_lo, dest_hi,
  5107. dest_lo;
  5108. const uint64_t mask0 = 0x0;
  5109. const uint64_t mask1 = 0x00FFFFFF00FFFFFFULL;
  5110. const uint64_t mask2 = 0x00FF00FF00FF00FFULL;
  5111. const uint64_t mask3 = 0xFF;
  5112. const uint64_t mask4 = ~mask1;
  5113. const uint64_t shift = 0x08;
  5114. __asm__ volatile(
  5115. "1: \n\t"
  5116. "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
  5117. "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
  5118. "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t"
  5119. "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
  5120. "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
  5121. "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t"
  5122. "psubush %[alpha], %[mask2], %[src0_lo] \n\t"
  5123. "pshufh %[alpha], %[alpha], %[mask3] \n\t"
  5124. "pmullh %[dest_lo], %[src1_lo], %[alpha] \n\t"
  5125. "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
  5126. "paddush %[dest_lo], %[dest_lo], %[src0_lo] \n\t"
  5127. "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t"
  5128. "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t"
  5129. "psubush %[alpha], %[mask2], %[src0_hi] \n\t"
  5130. "pshufh %[alpha], %[alpha], %[mask3] \n\t"
  5131. "pmullh %[dest_hi], %[src1_hi], %[alpha] \n\t"
  5132. "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
  5133. "paddush %[dest_hi], %[dest_hi], %[src0_hi] \n\t"
  5134. "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
  5135. "and %[dest], %[dest], %[mask1] \n\t"
  5136. "or %[dest], %[dest], %[mask4] \n\t"
  5137. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  5138. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  5139. "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
  5140. "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
  5141. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  5142. "daddi %[width], %[width], -0x02 \n\t"
  5143. "bnez %[width], 1b \n\t"
  5144. : [src0] "=&f"(src0), [src1] "=&f"(src1), [alpha] "=&f"(alpha),
  5145. [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
  5146. [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
  5147. [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo)
  5148. : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
  5149. [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), [mask1] "f"(mask1),
  5150. [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4),
  5151. [shift] "f"(shift), [width] "r"(width)
  5152. : "memory");
  5153. }
  5154. void BlendPlaneRow_MMI(const uint8_t* src0,
  5155. const uint8_t* src1,
  5156. const uint8_t* alpha,
  5157. uint8_t* dst,
  5158. int width) {
  5159. uint64_t source0, source1, dest, alph;
  5160. uint64_t src0_hi, src0_lo, src1_hi, src1_lo, alpha_hi, alpha_lo, dest_hi,
  5161. dest_lo;
  5162. uint64_t alpha_rev, alpha_rev_lo, alpha_rev_hi;
  5163. const uint64_t mask0 = 0x0;
  5164. const uint64_t mask1 = 0xFFFFFFFFFFFFFFFFULL;
  5165. const uint64_t mask2 = 0x00FF00FF00FF00FFULL;
  5166. const uint64_t shift = 0x08;
  5167. __asm__ volatile(
  5168. "1: \n\t"
  5169. "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
  5170. "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
  5171. "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t"
  5172. "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t"
  5173. "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
  5174. "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
  5175. "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t"
  5176. "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t"
  5177. "gsldlc1 %[alpha], 0x07(%[alpha_ptr]) \n\t"
  5178. "gsldrc1 %[alpha], 0x00(%[alpha_ptr]) \n\t"
  5179. "psubusb %[alpha_r], %[mask1], %[alpha] \n\t"
  5180. "punpcklbh %[alpha_lo], %[alpha], %[mask0] \n\t"
  5181. "punpckhbh %[alpha_hi], %[alpha], %[mask0] \n\t"
  5182. "punpcklbh %[alpha_rlo], %[alpha_r], %[mask0] \n\t"
  5183. "punpckhbh %[alpha_rhi], %[alpha_r], %[mask0] \n\t"
  5184. "pmullh %[dest_lo], %[src0_lo], %[alpha_lo] \n\t"
  5185. "pmullh %[dest], %[src1_lo], %[alpha_rlo] \n\t"
  5186. "paddush %[dest_lo], %[dest_lo], %[dest] \n\t"
  5187. "paddush %[dest_lo], %[dest_lo], %[mask2] \n\t"
  5188. "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
  5189. "pmullh %[dest_hi], %[src0_hi], %[alpha_hi] \n\t"
  5190. "pmullh %[dest], %[src1_hi], %[alpha_rhi] \n\t"
  5191. "paddush %[dest_hi], %[dest_hi], %[dest] \n\t"
  5192. "paddush %[dest_hi], %[dest_hi], %[mask2] \n\t"
  5193. "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
  5194. "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
  5195. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  5196. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  5197. "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
  5198. "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
  5199. "daddiu %[alpha_ptr], %[alpha_ptr], 0x08 \n\t"
  5200. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  5201. "daddi %[width], %[width], -0x08 \n\t"
  5202. "bnez %[width], 1b \n\t"
  5203. : [src0] "=&f"(source0), [src1] "=&f"(source1), [alpha] "=&f"(alph),
  5204. [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
  5205. [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
  5206. [alpha_hi] "=&f"(alpha_hi), [alpha_lo] "=&f"(alpha_lo),
  5207. [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
  5208. [alpha_rlo] "=&f"(alpha_rev_lo), [alpha_rhi] "=&f"(alpha_rev_hi),
  5209. [alpha_r] "=&f"(alpha_rev)
  5210. : [src0_ptr] "r"(src0), [src1_ptr] "r"(src1), [alpha_ptr] "r"(alpha),
  5211. [dst_ptr] "r"(dst), [mask0] "f"(mask0), [mask1] "f"(mask1),
  5212. [mask2] "f"(mask2), [shift] "f"(shift), [width] "r"(width)
  5213. : "memory");
  5214. }
  5215. // Multiply source RGB by alpha and store to destination.
  5216. // This code mimics the SSSE3 version for better testability.
  5217. void ARGBAttenuateRow_MMI(const uint8_t* src_argb,
  5218. uint8_t* dst_argb,
  5219. int width) {
  5220. uint64_t src, src_hi, src_lo, dest, dest_hi, dest_lo, alpha;
  5221. const uint64_t mask0 = 0xFF;
  5222. const uint64_t mask1 = 0xFF000000FF000000ULL;
  5223. const uint64_t mask2 = ~mask1;
  5224. const uint64_t shift = 0x08;
  5225. __asm__ volatile(
  5226. "1: \n\t"
  5227. "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
  5228. "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
  5229. "punpcklbh %[src_lo], %[src], %[src] \n\t"
  5230. "punpckhbh %[src_hi], %[src], %[src] \n\t"
  5231. "pshufh %[alpha], %[src_lo], %[mask0] \n\t"
  5232. "pmulhuh %[dest_lo], %[alpha], %[src_lo] \n\t"
  5233. "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
  5234. "pshufh %[alpha], %[src_hi], %[mask0] \n\t"
  5235. "pmulhuh %[dest_hi], %[alpha], %[src_hi] \n\t"
  5236. "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
  5237. "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
  5238. "and %[dest], %[dest], %[mask2] \n\t"
  5239. "and %[src], %[src], %[mask1] \n\t"
  5240. "or %[dest], %[dest], %[src] \n\t"
  5241. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  5242. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  5243. "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
  5244. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  5245. "daddi %[width], %[width], -0x02 \n\t"
  5246. "bnez %[width], 1b \n\t"
  5247. : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
  5248. [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi),
  5249. [dest_lo] "=&f"(dest_lo), [alpha] "=&f"(alpha)
  5250. : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
  5251. [mask1] "f"(mask1), [mask2] "f"(mask2), [shift] "f"(shift),
  5252. [width] "r"(width)
  5253. : "memory");
  5254. }
  5255. void ComputeCumulativeSumRow_MMI(const uint8_t* row,
  5256. int32_t* cumsum,
  5257. const int32_t* previous_cumsum,
  5258. int width) {
  5259. int64_t row_sum[2] = {0, 0};
  5260. uint64_t src, dest0, dest1, presrc0, presrc1, dest;
  5261. const uint64_t mask = 0x0;
  5262. __asm__ volatile(
  5263. "xor %[row_sum0], %[row_sum0], %[row_sum0] \n\t"
  5264. "xor %[row_sum1], %[row_sum1], %[row_sum1] \n\t"
  5265. "1: \n\t"
  5266. "gslwlc1 %[src], 0x03(%[row_ptr]) \n\t"
  5267. "gslwrc1 %[src], 0x00(%[row_ptr]) \n\t"
  5268. "punpcklbh %[src], %[src], %[mask] \n\t"
  5269. "punpcklhw %[dest0], %[src], %[mask] \n\t"
  5270. "punpckhhw %[dest1], %[src], %[mask] \n\t"
  5271. "paddw %[row_sum0], %[row_sum0], %[dest0] \n\t"
  5272. "paddw %[row_sum1], %[row_sum1], %[dest1] \n\t"
  5273. "gsldlc1 %[presrc0], 0x07(%[pre_ptr]) \n\t"
  5274. "gsldrc1 %[presrc0], 0x00(%[pre_ptr]) \n\t"
  5275. "gsldlc1 %[presrc1], 0x0f(%[pre_ptr]) \n\t"
  5276. "gsldrc1 %[presrc1], 0x08(%[pre_ptr]) \n\t"
  5277. "paddw %[dest0], %[row_sum0], %[presrc0] \n\t"
  5278. "paddw %[dest1], %[row_sum1], %[presrc1] \n\t"
  5279. "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
  5280. "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
  5281. "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
  5282. "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
  5283. "daddiu %[row_ptr], %[row_ptr], 0x04 \n\t"
  5284. "daddiu %[pre_ptr], %[pre_ptr], 0x10 \n\t"
  5285. "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
  5286. "daddi %[width], %[width], -0x01 \n\t"
  5287. "bnez %[width], 1b \n\t"
  5288. : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
  5289. [dest1] "=&f"(dest1), [row_sum0] "+&f"(row_sum[0]),
  5290. [row_sum1] "+&f"(row_sum[1]), [presrc0] "=&f"(presrc0),
  5291. [presrc1] "=&f"(presrc1)
  5292. : [row_ptr] "r"(row), [pre_ptr] "r"(previous_cumsum),
  5293. [dst_ptr] "r"(cumsum), [width] "r"(width), [mask] "f"(mask)
  5294. : "memory");
  5295. }
  5296. // C version 2x2 -> 2x1.
  5297. void InterpolateRow_MMI(uint8_t* dst_ptr,
  5298. const uint8_t* src_ptr,
  5299. ptrdiff_t src_stride,
  5300. int width,
  5301. int source_y_fraction) {
  5302. if (source_y_fraction == 0) {
  5303. __asm__ volatile(
  5304. "1: \n\t"
  5305. "ld $t0, 0x0(%[src_ptr]) \n\t"
  5306. "sd $t0, 0x0(%[dst_ptr]) \n\t"
  5307. "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
  5308. "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
  5309. "daddiu %[width], %[width], -8 \n\t"
  5310. "bgtz %[width], 1b \n\t"
  5311. "nop \n\t"
  5312. :
  5313. : [dst_ptr] "r"(dst_ptr), [src_ptr] "r"(src_ptr), [width] "r"(width)
  5314. : "memory");
  5315. return;
  5316. }
  5317. if (source_y_fraction == 128) {
  5318. uint64_t uv = 0x0;
  5319. uint64_t uv_stride = 0x0;
  5320. __asm__ volatile(
  5321. "1: \n\t"
  5322. "gsldrc1 %[uv], 0x0(%[src_ptr]) \n\t"
  5323. "gsldlc1 %[uv], 0x7(%[src_ptr]) \n\t"
  5324. "daddu $t0, %[src_ptr], %[stride] \n\t"
  5325. "gsldrc1 %[uv_stride], 0x0($t0) \n\t"
  5326. "gsldlc1 %[uv_stride], 0x7($t0) \n\t"
  5327. "pavgb %[uv], %[uv], %[uv_stride] \n\t"
  5328. "gssdrc1 %[uv], 0x0(%[dst_ptr]) \n\t"
  5329. "gssdlc1 %[uv], 0x7(%[dst_ptr]) \n\t"
  5330. "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
  5331. "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
  5332. "daddiu %[width], %[width], -8 \n\t"
  5333. "bgtz %[width], 1b \n\t"
  5334. "nop \n\t"
  5335. : [uv] "=&f"(uv), [uv_stride] "=&f"(uv_stride)
  5336. : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(width),
  5337. [stride] "r"((int64_t)src_stride)
  5338. : "memory");
  5339. return;
  5340. }
  5341. const uint8_t* src_ptr1 = src_ptr + src_stride;
  5342. uint64_t temp;
  5343. uint64_t data[4];
  5344. uint64_t zero = 0x0;
  5345. uint64_t c0 = 0x0080008000800080;
  5346. uint64_t fy0 = 0x0100010001000100;
  5347. uint64_t shift = 0x8;
  5348. __asm__ volatile(
  5349. "pshufh %[fy1], %[fy1], %[zero] \n\t"
  5350. "psubh %[fy0], %[fy0], %[fy1] \n\t"
  5351. "1: \n\t"
  5352. "gsldrc1 %[t0], 0x0(%[src_ptr]) \n\t"
  5353. "gsldlc1 %[t0], 0x7(%[src_ptr]) \n\t"
  5354. "punpcklbh %[d0], %[t0], %[zero] \n\t"
  5355. "punpckhbh %[d1], %[t0], %[zero] \n\t"
  5356. "gsldrc1 %[t0], 0x0(%[src_ptr1]) \n\t"
  5357. "gsldlc1 %[t0], 0x7(%[src_ptr1]) \n\t"
  5358. "punpcklbh %[d2], %[t0], %[zero] \n\t"
  5359. "punpckhbh %[d3], %[t0], %[zero] \n\t"
  5360. "pmullh %[d0], %[d0], %[fy0] \n\t"
  5361. "pmullh %[d2], %[d2], %[fy1] \n\t"
  5362. "paddh %[d0], %[d0], %[d2] \n\t"
  5363. "paddh %[d0], %[d0], %[c0] \n\t"
  5364. "psrlh %[d0], %[d0], %[shift] \n\t"
  5365. "pmullh %[d1], %[d1], %[fy0] \n\t"
  5366. "pmullh %[d3], %[d3], %[fy1] \n\t"
  5367. "paddh %[d1], %[d1], %[d3] \n\t"
  5368. "paddh %[d1], %[d1], %[c0] \n\t"
  5369. "psrlh %[d1], %[d1], %[shift] \n\t"
  5370. "packushb %[d0], %[d0], %[d1] \n\t"
  5371. "gssdrc1 %[d0], 0x0(%[dst_ptr]) \n\t"
  5372. "gssdlc1 %[d0], 0x7(%[dst_ptr]) \n\t"
  5373. "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
  5374. "daddiu %[src_ptr1], %[src_ptr1], 8 \n\t"
  5375. "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
  5376. "daddiu %[width], %[width], -8 \n\t"
  5377. "bgtz %[width], 1b \n\t"
  5378. "nop \n\t"
  5379. : [t0] "=&f"(temp), [d0] "=&f"(data[0]), [d1] "=&f"(data[1]),
  5380. [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
  5381. : [src_ptr] "r"(src_ptr), [src_ptr1] "r"(src_ptr1),
  5382. [dst_ptr] "r"(dst_ptr), [width] "r"(width),
  5383. [fy1] "f"(source_y_fraction), [fy0] "f"(fy0), [c0] "f"(c0),
  5384. [shift] "f"(shift), [zero] "f"(zero)
  5385. : "memory");
  5386. }
  5387. // Use first 4 shuffler values to reorder ARGB channels.
  5388. void ARGBShuffleRow_MMI(const uint8_t* src_argb,
  5389. uint8_t* dst_argb,
  5390. const uint8_t* shuffler,
  5391. int width) {
  5392. uint64_t source, dest0, dest1, dest;
  5393. const uint64_t mask0 = 0x0;
  5394. const uint64_t mask1 = (shuffler[0] & 0x03) | ((shuffler[1] & 0x03) << 2) |
  5395. ((shuffler[2] & 0x03) << 4) |
  5396. ((shuffler[3] & 0x03) << 6);
  5397. __asm__ volatile(
  5398. "1: \n\t"
  5399. "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
  5400. "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
  5401. "punpcklbh %[dest0], %[src], %[mask0] \n\t"
  5402. "pshufh %[dest0], %[dest0], %[mask1] \n\t"
  5403. "punpckhbh %[dest1], %[src], %[mask0] \n\t"
  5404. "pshufh %[dest1], %[dest1], %[mask1] \n\t"
  5405. "packushb %[dest], %[dest0], %[dest1] \n\t"
  5406. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  5407. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  5408. "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
  5409. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  5410. "daddi %[width], %[width], -0x02 \n\t"
  5411. "bnez %[width], 1b \n\t"
  5412. : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
  5413. [dest1] "=&f"(dest1)
  5414. : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
  5415. [mask1] "f"(mask1), [width] "r"(width)
  5416. : "memory");
  5417. }
  5418. void I422ToYUY2Row_MMI(const uint8_t* src_y,
  5419. const uint8_t* src_u,
  5420. const uint8_t* src_v,
  5421. uint8_t* dst_frame,
  5422. int width) {
  5423. uint64_t temp[3];
  5424. uint64_t vu = 0x0;
  5425. __asm__ volatile(
  5426. "1: \n\t"
  5427. "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i]
  5428. "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i]
  5429. "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i]
  5430. "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i]
  5431. "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i]
  5432. "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i]
  5433. "punpcklbh %[vu], %[tu], %[tv] \n\t" // g
  5434. "punpcklbh %[tu], %[ty], %[vu] \n\t" // g
  5435. "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t"
  5436. "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t"
  5437. "punpckhbh %[tu], %[ty], %[vu] \n\t" // g
  5438. "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t"
  5439. "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t"
  5440. "daddiu %[src_y], %[src_y], 8 \n\t"
  5441. "daddiu %[src_u], %[src_u], 4 \n\t"
  5442. "daddiu %[src_v], %[src_v], 4 \n\t"
  5443. "daddiu %[dst_frame], %[dst_frame], 16 \n\t"
  5444. "daddiu %[width], %[width], -8 \n\t"
  5445. "bgtz %[width], 1b \n\t"
  5446. "nop \n\t"
  5447. : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]),
  5448. [vu] "=&f"(vu)
  5449. : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
  5450. [dst_frame] "r"(dst_frame), [width] "r"(width)
  5451. : "memory");
  5452. }
  5453. void I422ToUYVYRow_MMI(const uint8_t* src_y,
  5454. const uint8_t* src_u,
  5455. const uint8_t* src_v,
  5456. uint8_t* dst_frame,
  5457. int width) {
  5458. uint64_t temp[3];
  5459. uint64_t vu = 0x0;
  5460. __asm__ volatile(
  5461. "1: \n\t"
  5462. "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i]
  5463. "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i]
  5464. "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i]
  5465. "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i]
  5466. "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i]
  5467. "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i]
  5468. "punpcklbh %[vu], %[tu], %[tv] \n\t" // g
  5469. "punpcklbh %[tu], %[vu], %[ty] \n\t" // g
  5470. "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t"
  5471. "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t"
  5472. "punpckhbh %[tu], %[vu], %[ty] \n\t" // g
  5473. "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t"
  5474. "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t"
  5475. "daddiu %[src_y], %[src_y], 8 \n\t"
  5476. "daddiu %[src_u], %[src_u], 4 \n\t"
  5477. "daddiu %[src_v], %[src_v], 4 \n\t"
  5478. "daddiu %[dst_frame], %[dst_frame], 16 \n\t"
  5479. "daddiu %[width], %[width], -8 \n\t"
  5480. "bgtz %[width], 1b \n\t"
  5481. "nop \n\t"
  5482. : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]),
  5483. [vu] "=&f"(vu)
  5484. : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
  5485. [dst_frame] "r"(dst_frame), [width] "r"(width)
  5486. : "memory");
  5487. }
  5488. void ARGBCopyAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
  5489. uint64_t source, dest;
  5490. const uint64_t mask0 = 0xff000000ff000000ULL;
  5491. const uint64_t mask1 = ~mask0;
  5492. __asm__ volatile(
  5493. "1: \n\t"
  5494. "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
  5495. "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
  5496. "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  5497. "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  5498. "and %[src], %[src], %[mask0] \n\t"
  5499. "and %[dest], %[dest], %[mask1] \n\t"
  5500. "or %[dest], %[src], %[dest] \n\t"
  5501. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  5502. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  5503. "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
  5504. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  5505. "daddi %[width], %[width], -0x02 \n\t"
  5506. "bnez %[width], 1b \n\t"
  5507. : [src] "=&f"(source), [dest] "=&f"(dest)
  5508. : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
  5509. [mask1] "f"(mask1), [width] "r"(width)
  5510. : "memory");
  5511. }
  5512. void ARGBExtractAlphaRow_MMI(const uint8_t* src_argb,
  5513. uint8_t* dst_a,
  5514. int width) {
  5515. uint64_t src, dest0, dest1, dest_lo, dest_hi, dest;
  5516. const uint64_t mask = 0xff000000ff000000ULL;
  5517. const uint64_t shift = 0x18;
  5518. __asm__ volatile(
  5519. "1: \n\t"
  5520. "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
  5521. "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
  5522. "and %[dest0], %[src], %[mask] \n\t"
  5523. "psrlw %[dest0], %[dest0], %[shift] \n\t"
  5524. "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t"
  5525. "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t"
  5526. "and %[dest1], %[src], %[mask] \n\t"
  5527. "psrlw %[dest1], %[dest1], %[shift] \n\t"
  5528. "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
  5529. "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t"
  5530. "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t"
  5531. "and %[dest0], %[src], %[mask] \n\t"
  5532. "psrlw %[dest0], %[dest0], %[shift] \n\t"
  5533. "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t"
  5534. "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t"
  5535. "and %[dest1], %[src], %[mask] \n\t"
  5536. "psrlw %[dest1], %[dest1], %[shift] \n\t"
  5537. "packsswh %[dest_hi], %[dest0], %[dest1] \n\t"
  5538. "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
  5539. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  5540. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  5541. "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
  5542. "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
  5543. "daddi %[width], %[width], -0x08 \n\t"
  5544. "bnez %[width], 1b \n\t"
  5545. : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
  5546. [dest1] "=&f"(dest1), [dest_lo] "=&f"(dest_lo), [dest_hi] "=&f"(dest_hi)
  5547. : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_a), [mask] "f"(mask),
  5548. [shift] "f"(shift), [width] "r"(width)
  5549. : "memory");
  5550. }
  5551. void ARGBCopyYToAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
  5552. uint64_t source, dest0, dest1, dest;
  5553. const uint64_t mask0 = 0x0;
  5554. const uint64_t mask1 = 0x00ffffff00ffffffULL;
  5555. __asm__ volatile(
  5556. "1: \n\t"
  5557. "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
  5558. "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
  5559. "punpcklbh %[dest0], %[mask0], %[src] \n\t"
  5560. "punpcklhw %[dest1], %[mask0], %[dest0] \n\t"
  5561. "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  5562. "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  5563. "and %[dest], %[dest], %[mask1] \n\t"
  5564. "or %[dest], %[dest], %[dest1] \n\t"
  5565. "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
  5566. "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
  5567. "punpckhhw %[dest1], %[mask0], %[dest0] \n\t"
  5568. "gsldlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
  5569. "gsldrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
  5570. "and %[dest], %[dest], %[mask1] \n\t"
  5571. "or %[dest], %[dest], %[dest1] \n\t"
  5572. "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
  5573. "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
  5574. "punpckhbh %[dest0], %[mask0], %[src] \n\t"
  5575. "punpcklhw %[dest1], %[mask0], %[dest0] \n\t"
  5576. "gsldlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
  5577. "gsldrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
  5578. "and %[dest], %[dest], %[mask1] \n\t"
  5579. "or %[dest], %[dest], %[dest1] \n\t"
  5580. "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
  5581. "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
  5582. "punpckhhw %[dest1], %[mask0], %[dest0] \n\t"
  5583. "gsldlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
  5584. "gsldrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
  5585. "and %[dest], %[dest], %[mask1] \n\t"
  5586. "or %[dest], %[dest], %[dest1] \n\t"
  5587. "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
  5588. "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
  5589. "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
  5590. "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t"
  5591. "daddi %[width], %[width], -0x08 \n\t"
  5592. "bnez %[width], 1b \n\t"
  5593. : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
  5594. [dest1] "=&f"(dest1)
  5595. : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
  5596. [mask1] "f"(mask1), [width] "r"(width)
  5597. : "memory");
  5598. }
  5599. void I444ToARGBRow_MMI(const uint8_t* src_y,
  5600. const uint8_t* src_u,
  5601. const uint8_t* src_v,
  5602. uint8_t* rgb_buf,
  5603. const struct YuvConstants* yuvconstants,
  5604. int width) {
  5605. uint64_t y,u,v;
  5606. uint64_t b_vec[2],g_vec[2],r_vec[2];
  5607. uint64_t mask = 0xff00ff00ff00ff00ULL;
  5608. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  5609. __asm__ volatile (
  5610. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg
  5611. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb
  5612. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub
  5613. "or %[ub], %[ub], %[mask] \n\t"//must sign extension
  5614. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg
  5615. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug
  5616. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  5617. "pshufh %[ug], %[ug], %[zero] \n\t"
  5618. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg
  5619. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  5620. "pshufh %[vg], %[vg], %[five] \n\t"
  5621. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br
  5622. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr
  5623. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  5624. "pshufh %[vr], %[vr], %[five] \n\t"
  5625. "or %[vr], %[vr], %[mask] \n\t"//sign extension
  5626. "1: \n\t"
  5627. "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
  5628. "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
  5629. "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
  5630. "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
  5631. "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
  5632. "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
  5633. "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
  5634. "pmulhuh %[y], %[y], %[yg] \n\t"//y1
  5635. "punpcklbh %[u], %[u], %[zero] \n\t"//u
  5636. "paddsh %[b_vec0], %[y], %[bb] \n\t"
  5637. "pmullh %[b_vec1], %[u], %[ub] \n\t"
  5638. "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
  5639. "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
  5640. "punpcklbh %[v], %[v], %[zero] \n\t"//v
  5641. "paddsh %[g_vec0], %[y], %[bg] \n\t"
  5642. "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug
  5643. "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
  5644. "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg
  5645. "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
  5646. "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
  5647. "paddsh %[r_vec0], %[y], %[br] \n\t"
  5648. "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr
  5649. "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
  5650. "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
  5651. "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
  5652. "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg
  5653. "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
  5654. "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb
  5655. "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr
  5656. "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
  5657. "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
  5658. "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
  5659. "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
  5660. "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
  5661. "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
  5662. "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
  5663. "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t"
  5664. "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t"
  5665. "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
  5666. "daddi %[width], %[width], -0x04 \n\t"
  5667. "bnez %[width], 1b \n\t"
  5668. : [y]"=&f"(y),
  5669. [u]"=&f"(u), [v]"=&f"(v),
  5670. [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
  5671. [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
  5672. [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
  5673. [ub]"=&f"(ub), [ug]"=&f"(ug),
  5674. [vg]"=&f"(vg), [vr]"=&f"(vr),
  5675. [bb]"=&f"(bb), [bg]"=&f"(bg),
  5676. [br]"=&f"(br), [yg]"=&f"(yg)
  5677. : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
  5678. [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
  5679. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  5680. [zero]"f"(0x00), [alpha]"f"(-1),
  5681. [six]"f"(0x6), [five]"f"(0x55),
  5682. [mask]"f"(mask)
  5683. : "memory"
  5684. );
  5685. }
  5686. // Also used for 420
  5687. void I422ToARGBRow_MMI(const uint8_t* src_y,
  5688. const uint8_t* src_u,
  5689. const uint8_t* src_v,
  5690. uint8_t* rgb_buf,
  5691. const struct YuvConstants* yuvconstants,
  5692. int width) {
  5693. uint64_t y,u,v;
  5694. uint64_t b_vec[2],g_vec[2],r_vec[2];
  5695. uint64_t mask = 0xff00ff00ff00ff00ULL;
  5696. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  5697. __asm__ volatile(
  5698. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg
  5699. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb
  5700. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub
  5701. "or %[ub], %[ub], %[mask] \n\t"//must sign extension
  5702. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg
  5703. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug
  5704. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  5705. "pshufh %[ug], %[ug], %[zero] \n\t"
  5706. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg
  5707. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  5708. "pshufh %[vg], %[vg], %[five] \n\t"
  5709. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br
  5710. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr
  5711. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  5712. "pshufh %[vr], %[vr], %[five] \n\t"
  5713. "or %[vr], %[vr], %[mask] \n\t"//sign extension
  5714. "1: \n\t"
  5715. "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
  5716. "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
  5717. "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
  5718. "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
  5719. "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
  5720. "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
  5721. "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
  5722. "pmulhuh %[y], %[y], %[yg] \n\t"//y1
  5723. //u3|u2|u1|u0 --> u1|u1|u0|u0
  5724. "punpcklbh %[u], %[u], %[u] \n\t"//u
  5725. "punpcklbh %[u], %[u], %[zero] \n\t"
  5726. "paddsh %[b_vec0], %[y], %[bb] \n\t"
  5727. "pmullh %[b_vec1], %[u], %[ub] \n\t"
  5728. "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
  5729. "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
  5730. //v3|v2|v1|v0 --> v1|v1|v0|v0
  5731. "punpcklbh %[v], %[v], %[v] \n\t"//v
  5732. "punpcklbh %[v], %[v], %[zero] \n\t"
  5733. "paddsh %[g_vec0], %[y], %[bg] \n\t"
  5734. "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug
  5735. "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
  5736. "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg
  5737. "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
  5738. "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
  5739. "paddsh %[r_vec0], %[y], %[br] \n\t"
  5740. "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr
  5741. "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
  5742. "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
  5743. "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
  5744. "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg
  5745. "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
  5746. "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb
  5747. "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr
  5748. "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
  5749. "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
  5750. "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
  5751. "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
  5752. "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
  5753. "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
  5754. "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
  5755. "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
  5756. "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
  5757. "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
  5758. "daddi %[width], %[width], -0x04 \n\t"
  5759. "bnez %[width], 1b \n\t"
  5760. : [y]"=&f"(y),
  5761. [u]"=&f"(u), [v]"=&f"(v),
  5762. [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
  5763. [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
  5764. [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
  5765. [ub]"=&f"(ub), [ug]"=&f"(ug),
  5766. [vg]"=&f"(vg), [vr]"=&f"(vr),
  5767. [bb]"=&f"(bb), [bg]"=&f"(bg),
  5768. [br]"=&f"(br), [yg]"=&f"(yg)
  5769. : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
  5770. [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
  5771. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  5772. [zero]"f"(0x00), [alpha]"f"(-1),
  5773. [six]"f"(0x6), [five]"f"(0x55),
  5774. [mask]"f"(mask)
  5775. : "memory"
  5776. );
  5777. }
  5778. // 10 bit YUV to ARGB
  5779. void I210ToARGBRow_MMI(const uint16_t* src_y,
  5780. const uint16_t* src_u,
  5781. const uint16_t* src_v,
  5782. uint8_t* rgb_buf,
  5783. const struct YuvConstants* yuvconstants,
  5784. int width) {
  5785. uint64_t y,u,v;
  5786. uint64_t b_vec[2],g_vec[2],r_vec[2];
  5787. uint64_t mask = 0xff00ff00ff00ff00ULL;
  5788. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  5789. __asm__ volatile(
  5790. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  5791. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  5792. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  5793. "or %[ub], %[ub], %[mask] \n\t"
  5794. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  5795. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  5796. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  5797. "pshufh %[ug], %[ug], %[zero] \n\t"
  5798. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  5799. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  5800. "pshufh %[vg], %[vg], %[five] \n\t"
  5801. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  5802. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  5803. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  5804. "pshufh %[vr], %[vr], %[five] \n\t"
  5805. "or %[vr], %[vr], %[mask] \n\t"
  5806. "1: \n\t"
  5807. "gsldlc1 %[y], 0x07(%[y_ptr]) \n\t"
  5808. "gsldrc1 %[y], 0x00(%[y_ptr]) \n\t"
  5809. "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
  5810. "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
  5811. "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
  5812. "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
  5813. "psllh %[y], %[y], %[six] \n\t"
  5814. "pmulhuh %[y], %[y], %[yg] \n\t"
  5815. "punpcklhw %[u], %[u], %[u] \n\t"
  5816. "psrah %[u], %[u], %[two] \n\t"
  5817. "punpcklhw %[v], %[v], %[v] \n\t"
  5818. "psrah %[v], %[v], %[two] \n\t"
  5819. "pminsh %[u], %[u], %[mask1] \n\t"
  5820. "pminsh %[v], %[v], %[mask1] \n\t"
  5821. "paddsh %[b_vec0], %[y], %[bb] \n\t"
  5822. "pmullh %[b_vec1], %[u], %[ub] \n\t"
  5823. "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
  5824. "paddsh %[g_vec0], %[y], %[bg] \n\t"
  5825. "pmullh %[g_vec1], %[u], %[ug] \n\t"
  5826. "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
  5827. "pmullh %[g_vec1], %[v], %[vg] \n\t"
  5828. "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
  5829. "paddsh %[r_vec0], %[y], %[br] \n\t"
  5830. "pmullh %[r_vec1], %[v], %[vr] \n\t"
  5831. "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
  5832. "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
  5833. "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
  5834. "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
  5835. "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"
  5836. "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"
  5837. "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
  5838. "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
  5839. "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
  5840. "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
  5841. "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
  5842. "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
  5843. "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
  5844. "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
  5845. "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
  5846. "daddiu %[y_ptr], %[y_ptr], 0x08 \n\t"
  5847. "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t"
  5848. "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t"
  5849. "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
  5850. "daddi %[width], %[width], -0x04 \n\t"
  5851. "bnez %[width], 1b \n\t"
  5852. : [y]"=&f"(y),
  5853. [u]"=&f"(u), [v]"=&f"(v),
  5854. [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
  5855. [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
  5856. [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
  5857. [ub]"=&f"(ub), [ug]"=&f"(ug),
  5858. [vg]"=&f"(vg), [vr]"=&f"(vr),
  5859. [bb]"=&f"(bb), [bg]"=&f"(bg),
  5860. [br]"=&f"(br), [yg]"=&f"(yg)
  5861. : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
  5862. [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
  5863. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  5864. [zero]"f"(0x00), [alpha]"f"(-1),
  5865. [six]"f"(0x6), [five]"f"(0x55),
  5866. [mask]"f"(mask), [two]"f"(0x02),
  5867. [mask1]"f"(0x00ff00ff00ff00ff)
  5868. : "memory"
  5869. );
  5870. }
  5871. void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
  5872. const uint8_t* src_u,
  5873. const uint8_t* src_v,
  5874. const uint8_t* src_a,
  5875. uint8_t* rgb_buf,
  5876. const struct YuvConstants* yuvconstants,
  5877. int width) {
  5878. uint64_t y,u,v,a;
  5879. uint64_t b_vec[2],g_vec[2],r_vec[2];
  5880. uint64_t mask = 0xff00ff00ff00ff00ULL;
  5881. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  5882. __asm__ volatile(
  5883. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  5884. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  5885. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  5886. "or %[ub], %[ub], %[mask] \n\t"
  5887. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  5888. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  5889. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  5890. "pshufh %[ug], %[ug], %[zero] \n\t"
  5891. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  5892. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  5893. "pshufh %[vg], %[vg], %[five] \n\t"
  5894. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  5895. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  5896. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  5897. "pshufh %[vr], %[vr], %[five] \n\t"
  5898. "or %[vr], %[vr], %[mask] \n\t"
  5899. "1: \n\t"
  5900. "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
  5901. "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
  5902. "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
  5903. "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
  5904. "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
  5905. "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
  5906. "gslwlc1 %[a], 0x03(%[a_ptr]) \n\t"
  5907. "gslwrc1 %[a], 0x00(%[a_ptr]) \n\t"
  5908. "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
  5909. "pmulhuh %[y], %[y], %[yg] \n\t"//y1
  5910. //u3|u2|u1|u0 --> u1|u1|u0|u0
  5911. "punpcklbh %[u], %[u], %[u] \n\t"//u
  5912. "punpcklbh %[u], %[u], %[zero] \n\t"
  5913. "paddsh %[b_vec0], %[y], %[bb] \n\t"
  5914. "pmullh %[b_vec1], %[u], %[ub] \n\t"
  5915. "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
  5916. "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
  5917. //v3|v2|v1|v0 --> v1|v1|v0|v0
  5918. "punpcklbh %[v], %[v], %[v] \n\t"
  5919. "punpcklbh %[v], %[v], %[zero] \n\t"
  5920. "paddsh %[g_vec0], %[y], %[bg] \n\t"
  5921. "pmullh %[g_vec1], %[u], %[ug] \n\t"
  5922. "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
  5923. "pmullh %[g_vec1], %[v], %[vg] \n\t"
  5924. "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
  5925. "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
  5926. "paddsh %[r_vec0], %[y], %[br] \n\t"
  5927. "pmullh %[r_vec1], %[v], %[vr] \n\t"
  5928. "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
  5929. "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
  5930. "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
  5931. "packushb %[g_vec0], %[g_vec0], %[a] \n\t"
  5932. "punpcklwd %[g_vec0], %[g_vec0], %[a] \n\t"//aaaagggg
  5933. "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
  5934. "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
  5935. "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
  5936. "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
  5937. "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
  5938. "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
  5939. "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
  5940. "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
  5941. "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
  5942. "daddiu %[a_ptr], %[a_ptr], 0x04 \n\t"
  5943. "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
  5944. "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
  5945. "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
  5946. "daddi %[width], %[width], -0x04 \n\t"
  5947. "bnez %[width], 1b \n\t"
  5948. : [y]"=&f"(y), [u]"=&f"(u),
  5949. [v]"=&f"(v), [a]"=&f"(a),
  5950. [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
  5951. [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
  5952. [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
  5953. [ub]"=&f"(ub), [ug]"=&f"(ug),
  5954. [vg]"=&f"(vg), [vr]"=&f"(vr),
  5955. [bb]"=&f"(bb), [bg]"=&f"(bg),
  5956. [br]"=&f"(br), [yg]"=&f"(yg)
  5957. : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
  5958. [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
  5959. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  5960. [a_ptr]"r"(src_a), [zero]"f"(0x00),
  5961. [six]"f"(0x6), [five]"f"(0x55),
  5962. [mask]"f"(mask)
  5963. : "memory"
  5964. );
  5965. }
  5966. void I422ToRGB24Row_MMI(const uint8_t* src_y,
  5967. const uint8_t* src_u,
  5968. const uint8_t* src_v,
  5969. uint8_t* rgb_buf,
  5970. const struct YuvConstants* yuvconstants,
  5971. int width) {
  5972. uint64_t y,u,v;
  5973. uint64_t b_vec[2],g_vec[2],r_vec[2];
  5974. uint64_t mask = 0xff00ff00ff00ff00ULL;
  5975. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  5976. __asm__ volatile(
  5977. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  5978. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  5979. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  5980. "or %[ub], %[ub], %[mask] \n\t"
  5981. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  5982. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  5983. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  5984. "pshufh %[ug], %[ug], %[zero] \n\t"
  5985. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  5986. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  5987. "pshufh %[vg], %[vg], %[five] \n\t"
  5988. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  5989. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  5990. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  5991. "pshufh %[vr], %[vr], %[five] \n\t"
  5992. "or %[vr], %[vr], %[mask] \n\t"
  5993. "1: \n\t"
  5994. "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
  5995. "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
  5996. "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
  5997. "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
  5998. "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
  5999. "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
  6000. "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
  6001. "pmulhuh %[y], %[y], %[yg] \n\t"//y1
  6002. //u3|u2|u1|u0 --> u1|u1|u0|u0
  6003. "punpcklbh %[u], %[u], %[u] \n\t"//u
  6004. "punpcklbh %[u], %[u], %[zero] \n\t"
  6005. "paddsh %[b_vec0], %[y], %[bb] \n\t"
  6006. "pmullh %[b_vec1], %[u], %[ub] \n\t"
  6007. "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
  6008. "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
  6009. //v3|v2|v1|v0 --> v1|v1|v0|v0
  6010. "punpcklbh %[v], %[v], %[v] \n\t"
  6011. "punpcklbh %[v], %[v], %[zero] \n\t"
  6012. "paddsh %[g_vec0], %[y], %[bg] \n\t"
  6013. "pmullh %[g_vec1], %[u], %[ug] \n\t"
  6014. "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
  6015. "pmullh %[g_vec1], %[v], %[vg] \n\t"
  6016. "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
  6017. "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
  6018. "paddsh %[r_vec0], %[y], %[br] \n\t"
  6019. "pmullh %[r_vec1], %[v], %[vr] \n\t"
  6020. "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
  6021. "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
  6022. "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"
  6023. "packushb %[g_vec0], %[g_vec0], %[zero] \n\t"
  6024. "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
  6025. "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
  6026. "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
  6027. "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
  6028. "punpckhwd %[r_vec0], %[g_vec0], %[g_vec0] \n\t"
  6029. "psllw %[r_vec1], %[r_vec0], %[lmove1] \n\t"
  6030. "or %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
  6031. "psrlw %[r_vec1], %[r_vec0], %[rmove1] \n\t"
  6032. "pextrh %[r_vec1], %[r_vec1], %[zero] \n\t"
  6033. "pinsrh_2 %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
  6034. "pextrh %[r_vec1], %[g_vec1], %[zero] \n\t"
  6035. "pinsrh_3 %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
  6036. "pextrh %[r_vec1], %[g_vec1], %[one] \n\t"
  6037. "punpckhwd %[g_vec1], %[g_vec1], %[g_vec1] \n\t"
  6038. "psllw %[g_vec1], %[g_vec1], %[rmove1] \n\t"
  6039. "or %[g_vec1], %[g_vec1], %[r_vec1] \n\t"
  6040. "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
  6041. "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
  6042. "gsswlc1 %[g_vec1], 0x0b(%[rgbbuf_ptr]) \n\t"
  6043. "gsswrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
  6044. "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
  6045. "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
  6046. "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
  6047. "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0c \n\t"
  6048. "daddi %[width], %[width], -0x04 \n\t"
  6049. "bnez %[width], 1b \n\t"
  6050. : [y]"=&f"(y), [u]"=&f"(u),
  6051. [v]"=&f"(v),
  6052. [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
  6053. [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
  6054. [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
  6055. [ub]"=&f"(ub), [ug]"=&f"(ug),
  6056. [vg]"=&f"(vg), [vr]"=&f"(vr),
  6057. [bb]"=&f"(bb), [bg]"=&f"(bg),
  6058. [br]"=&f"(br), [yg]"=&f"(yg)
  6059. : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
  6060. [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
  6061. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  6062. [zero]"f"(0x00), [five]"f"(0x55),
  6063. [six]"f"(0x6), [mask]"f"(mask),
  6064. [lmove1]"f"(0x18), [rmove1]"f"(0x8),
  6065. [one]"f"(0x1)
  6066. : "memory"
  6067. );
  6068. }
  6069. void I422ToARGB4444Row_MMI(const uint8_t* src_y,
  6070. const uint8_t* src_u,
  6071. const uint8_t* src_v,
  6072. uint8_t* dst_argb4444,
  6073. const struct YuvConstants* yuvconstants,
  6074. int width) {
  6075. uint64_t y, u, v;
  6076. uint64_t b_vec, g_vec, r_vec, temp;
  6077. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  6078. __asm__ volatile(
  6079. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  6080. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  6081. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  6082. "or %[ub], %[ub], %[mask] \n\t"
  6083. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  6084. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  6085. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  6086. "pshufh %[ug], %[ug], %[zero] \n\t"
  6087. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  6088. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  6089. "pshufh %[vg], %[vg], %[five] \n\t"
  6090. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  6091. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  6092. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  6093. "pshufh %[vr], %[vr], %[five] \n\t"
  6094. "or %[vr], %[vr], %[mask] \n\t"
  6095. "1: \n\t"
  6096. "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
  6097. "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
  6098. "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
  6099. "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
  6100. "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
  6101. "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
  6102. "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
  6103. "pmulhuh %[y], %[y], %[yg] \n\t"//y1
  6104. //u3|u2|u1|u0 --> u1|u1|u0|u0
  6105. "punpcklbh %[u], %[u], %[u] \n\t"//u
  6106. "punpcklbh %[u], %[u], %[zero] \n\t"
  6107. "paddsh %[b_vec], %[y], %[bb] \n\t"
  6108. "pmullh %[temp], %[u], %[ub] \n\t"
  6109. "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
  6110. "psrah %[b_vec], %[b_vec], %[six] \n\t"
  6111. //v3|v2|v1|v0 --> v1|v1|v0|v0
  6112. "punpcklbh %[v], %[v], %[v] \n\t"
  6113. "punpcklbh %[v], %[v], %[zero] \n\t"
  6114. "paddsh %[g_vec], %[y], %[bg] \n\t"
  6115. "pmullh %[temp], %[u], %[ug] \n\t"
  6116. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6117. "pmullh %[temp], %[v], %[vg] \n\t"
  6118. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6119. "psrah %[g_vec], %[g_vec], %[six] \n\t"
  6120. "paddsh %[r_vec], %[y], %[br] \n\t"
  6121. "pmullh %[temp], %[v], %[vr] \n\t"
  6122. "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
  6123. "psrah %[r_vec], %[r_vec], %[six] \n\t"
  6124. "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
  6125. "packushb %[g_vec], %[g_vec], %[zero] \n\t"
  6126. "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
  6127. "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
  6128. "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
  6129. "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
  6130. "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
  6131. "and %[g_vec], %[g_vec], %[mask1] \n\t"
  6132. "psrlw %[g_vec], %[g_vec], %[four] \n\t"
  6133. "psrlw %[r_vec], %[g_vec], %[four] \n\t"
  6134. "or %[g_vec], %[g_vec], %[r_vec] \n\t"
  6135. "punpcklbh %[r_vec], %[alpha], %[zero] \n\t"
  6136. "and %[g_vec], %[g_vec], %[r_vec] \n\t"
  6137. "and %[b_vec], %[b_vec], %[mask1] \n\t"
  6138. "psrlw %[b_vec], %[b_vec], %[four] \n\t"
  6139. "psrlw %[r_vec], %[b_vec], %[four] \n\t"
  6140. "or %[b_vec], %[b_vec], %[r_vec] \n\t"
  6141. "punpcklbh %[r_vec], %[alpha], %[zero] \n\t"
  6142. "and %[b_vec], %[b_vec], %[r_vec] \n\t"
  6143. "packushb %[g_vec], %[g_vec], %[b_vec] \n\t"
  6144. "gssdlc1 %[g_vec], 0x07(%[dst_argb4444]) \n\t"
  6145. "gssdrc1 %[g_vec], 0x00(%[dst_argb4444]) \n\t"
  6146. "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
  6147. "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
  6148. "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
  6149. "daddiu %[dst_argb4444], %[dst_argb4444], 0x08 \n\t"
  6150. "daddi %[width], %[width], -0x04 \n\t"
  6151. "bnez %[width], 1b \n\t"
  6152. : [y]"=&f"(y), [u]"=&f"(u),
  6153. [v]"=&f"(v),
  6154. [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
  6155. [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
  6156. [ub]"=&f"(ub), [ug]"=&f"(ug),
  6157. [vg]"=&f"(vg), [vr]"=&f"(vr),
  6158. [bb]"=&f"(bb), [bg]"=&f"(bg),
  6159. [br]"=&f"(br), [yg]"=&f"(yg)
  6160. : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
  6161. [v_ptr]"r"(src_v), [dst_argb4444]"r"(dst_argb4444),
  6162. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  6163. [zero]"f"(0x00), [five]"f"(0x55),
  6164. [six]"f"(0x6), [mask]"f"(0xff00ff00ff00ff00),
  6165. [four]"f"(0x4), [mask1]"f"(0xf0f0f0f0f0f0f0f0),
  6166. [alpha]"f"(-1)
  6167. : "memory"
  6168. );
  6169. }
  6170. void I422ToARGB1555Row_MMI(const uint8_t* src_y,
  6171. const uint8_t* src_u,
  6172. const uint8_t* src_v,
  6173. uint8_t* dst_argb1555,
  6174. const struct YuvConstants* yuvconstants,
  6175. int width) {
  6176. uint64_t y, u, v;
  6177. uint64_t b_vec, g_vec, r_vec, temp;
  6178. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  6179. __asm__ volatile(
  6180. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  6181. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  6182. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  6183. "or %[ub], %[ub], %[mask1] \n\t"
  6184. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  6185. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  6186. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  6187. "pshufh %[ug], %[ug], %[zero] \n\t"
  6188. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  6189. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  6190. "pshufh %[vg], %[vg], %[five] \n\t"
  6191. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  6192. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  6193. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  6194. "pshufh %[vr], %[vr], %[five] \n\t"
  6195. "or %[vr], %[vr], %[mask1] \n\t"
  6196. "1: \n\t"
  6197. "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
  6198. "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
  6199. "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
  6200. "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
  6201. "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
  6202. "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
  6203. "punpcklbh %[y], %[y], %[y] \n\t"
  6204. "pmulhuh %[y], %[y], %[yg] \n\t"
  6205. //u3|u2|u1|u0 --> u1|u1|u0|u0
  6206. "punpcklbh %[u], %[u], %[u] \n\t"
  6207. "punpcklbh %[u], %[u], %[zero] \n\t"
  6208. "paddsh %[b_vec], %[y], %[bb] \n\t"
  6209. "pmullh %[temp], %[u], %[ub] \n\t"
  6210. "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
  6211. "psrah %[b_vec], %[b_vec], %[six] \n\t"
  6212. //v3|v2|v1|v0 --> v1|v1|v0|v0
  6213. "punpcklbh %[v], %[v], %[v] \n\t"
  6214. "punpcklbh %[v], %[v], %[zero] \n\t"
  6215. "paddsh %[g_vec], %[y], %[bg] \n\t"
  6216. "pmullh %[temp], %[u], %[ug] \n\t"
  6217. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6218. "pmullh %[temp], %[v], %[vg] \n\t"
  6219. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6220. "psrah %[g_vec], %[g_vec], %[six] \n\t"
  6221. "paddsh %[r_vec], %[y], %[br] \n\t"
  6222. "pmullh %[temp], %[v], %[vr] \n\t"
  6223. "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
  6224. "psrah %[r_vec], %[r_vec], %[six] \n\t"
  6225. "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
  6226. "packushb %[g_vec], %[g_vec], %[zero] \n\t"
  6227. "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
  6228. "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
  6229. "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
  6230. "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
  6231. "psrlw %[temp], %[g_vec], %[three] \n\t"
  6232. "and %[g_vec], %[temp], %[mask2] \n\t"
  6233. "psrlw %[temp], %[temp], %[eight] \n\t"
  6234. "and %[r_vec], %[temp], %[mask2] \n\t"
  6235. "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
  6236. "or %[g_vec], %[g_vec], %[r_vec] \n\t"
  6237. "psrlw %[temp], %[temp], %[eight] \n\t"
  6238. "and %[r_vec], %[temp], %[mask2] \n\t"
  6239. "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
  6240. "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
  6241. "or %[g_vec], %[g_vec], %[r_vec] \n\t"
  6242. "or %[g_vec], %[g_vec], %[mask3] \n\t"
  6243. "psrlw %[temp], %[b_vec], %[three] \n\t"
  6244. "and %[b_vec], %[temp], %[mask2] \n\t"
  6245. "psrlw %[temp], %[temp], %[eight] \n\t"
  6246. "and %[r_vec], %[temp], %[mask2] \n\t"
  6247. "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
  6248. "or %[b_vec], %[b_vec], %[r_vec] \n\t"
  6249. "psrlw %[temp], %[temp], %[eight] \n\t"
  6250. "and %[r_vec], %[temp], %[mask2] \n\t"
  6251. "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
  6252. "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
  6253. "or %[b_vec], %[b_vec], %[r_vec] \n\t"
  6254. "or %[b_vec], %[b_vec], %[mask3] \n\t"
  6255. "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
  6256. "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
  6257. "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
  6258. "gssdlc1 %[g_vec], 0x07(%[dst_argb1555]) \n\t"
  6259. "gssdrc1 %[g_vec], 0x00(%[dst_argb1555]) \n\t"
  6260. "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
  6261. "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
  6262. "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
  6263. "daddiu %[dst_argb1555], %[dst_argb1555], 0x08 \n\t"
  6264. "daddi %[width], %[width], -0x04 \n\t"
  6265. "bnez %[width], 1b \n\t"
  6266. : [y]"=&f"(y), [u]"=&f"(u),
  6267. [v]"=&f"(v),
  6268. [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
  6269. [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
  6270. [ub]"=&f"(ub), [ug]"=&f"(ug),
  6271. [vg]"=&f"(vg), [vr]"=&f"(vr),
  6272. [bb]"=&f"(bb), [bg]"=&f"(bg),
  6273. [br]"=&f"(br), [yg]"=&f"(yg)
  6274. : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
  6275. [v_ptr]"r"(src_v), [dst_argb1555]"r"(dst_argb1555),
  6276. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  6277. [zero]"f"(0x00), [five]"f"(0x55),
  6278. [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
  6279. [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
  6280. [eight]"f"(0x8), [mask3]"f"(0x800000008000),
  6281. [lmove5]"f"(0x5)
  6282. : "memory"
  6283. );
  6284. }
  6285. void I422ToRGB565Row_MMI(const uint8_t* src_y,
  6286. const uint8_t* src_u,
  6287. const uint8_t* src_v,
  6288. uint8_t* dst_rgb565,
  6289. const struct YuvConstants* yuvconstants,
  6290. int width) {
  6291. uint64_t y, u, v;
  6292. uint64_t b_vec, g_vec, r_vec, temp;
  6293. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  6294. __asm__ volatile(
  6295. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  6296. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  6297. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  6298. "or %[ub], %[ub], %[mask1] \n\t"
  6299. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  6300. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  6301. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  6302. "pshufh %[ug], %[ug], %[zero] \n\t"
  6303. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  6304. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  6305. "pshufh %[vg], %[vg], %[five] \n\t"
  6306. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  6307. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  6308. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  6309. "pshufh %[vr], %[vr], %[five] \n\t"
  6310. "or %[vr], %[vr], %[mask1] \n\t"
  6311. "1: \n\t"
  6312. "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
  6313. "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
  6314. "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
  6315. "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
  6316. "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
  6317. "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
  6318. "punpcklbh %[y], %[y], %[y] \n\t"
  6319. "pmulhuh %[y], %[y], %[yg] \n\t"
  6320. //u3|u2|u1|u0 --> u1|u1|u0|u0
  6321. "punpcklbh %[u], %[u], %[u] \n\t"
  6322. "punpcklbh %[u], %[u], %[zero] \n\t"
  6323. "paddsh %[b_vec], %[y], %[bb] \n\t"
  6324. "pmullh %[temp], %[u], %[ub] \n\t"
  6325. "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
  6326. "psrah %[b_vec], %[b_vec], %[six] \n\t"
  6327. //v3|v2|v1|v0 --> v1|v1|v0|v0
  6328. "punpcklbh %[v], %[v], %[v] \n\t"
  6329. "punpcklbh %[v], %[v], %[zero] \n\t"
  6330. "paddsh %[g_vec], %[y], %[bg] \n\t"
  6331. "pmullh %[temp], %[u], %[ug] \n\t"
  6332. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6333. "pmullh %[temp], %[v], %[vg] \n\t"
  6334. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6335. "psrah %[g_vec], %[g_vec], %[six] \n\t"
  6336. "paddsh %[r_vec], %[y], %[br] \n\t"
  6337. "pmullh %[temp], %[v], %[vr] \n\t"
  6338. "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
  6339. "psrah %[r_vec], %[r_vec], %[six] \n\t"
  6340. "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
  6341. "packushb %[g_vec], %[g_vec], %[zero] \n\t"
  6342. "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
  6343. "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
  6344. "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
  6345. "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
  6346. "psrlh %[temp], %[g_vec], %[three] \n\t"
  6347. "and %[g_vec], %[temp], %[mask2] \n\t"
  6348. "psrlw %[temp], %[temp], %[seven] \n\t"
  6349. "psrlw %[r_vec], %[mask1], %[eight] \n\t"
  6350. "and %[r_vec], %[temp], %[r_vec] \n\t"
  6351. "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
  6352. "or %[g_vec], %[g_vec], %[r_vec] \n\t"
  6353. "paddb %[r_vec], %[three], %[six] \n\t"
  6354. "psrlw %[temp], %[temp], %[r_vec] \n\t"
  6355. "and %[r_vec], %[temp], %[mask2] \n\t"
  6356. "paddb %[temp], %[three], %[eight] \n\t"
  6357. "psllw %[r_vec], %[r_vec], %[temp] \n\t"
  6358. "or %[g_vec], %[g_vec], %[r_vec] \n\t"
  6359. "psrlh %[temp], %[b_vec], %[three] \n\t"
  6360. "and %[b_vec], %[temp], %[mask2] \n\t"
  6361. "psrlw %[temp], %[temp], %[seven] \n\t"
  6362. "psrlw %[r_vec], %[mask1], %[eight] \n\t"
  6363. "and %[r_vec], %[temp], %[r_vec] \n\t"
  6364. "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
  6365. "or %[b_vec], %[b_vec], %[r_vec] \n\t"
  6366. "paddb %[r_vec], %[three], %[six] \n\t"
  6367. "psrlw %[temp], %[temp], %[r_vec] \n\t"
  6368. "and %[r_vec], %[temp], %[mask2] \n\t"
  6369. "paddb %[temp], %[three], %[eight] \n\t"
  6370. "psllw %[r_vec], %[r_vec], %[temp] \n\t"
  6371. "or %[b_vec], %[b_vec], %[r_vec] \n\t"
  6372. "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
  6373. "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
  6374. "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
  6375. "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t"
  6376. "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t"
  6377. "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
  6378. "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
  6379. "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
  6380. "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t"
  6381. "daddi %[width], %[width], -0x04 \n\t"
  6382. "bnez %[width], 1b \n\t"
  6383. : [y]"=&f"(y), [u]"=&f"(u),
  6384. [v]"=&f"(v),
  6385. [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
  6386. [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
  6387. [ub]"=&f"(ub), [ug]"=&f"(ug),
  6388. [vg]"=&f"(vg), [vr]"=&f"(vr),
  6389. [bb]"=&f"(bb), [bg]"=&f"(bg),
  6390. [br]"=&f"(br), [yg]"=&f"(yg)
  6391. : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
  6392. [v_ptr]"r"(src_v), [dst_rgb565]"r"(dst_rgb565),
  6393. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  6394. [zero]"f"(0x00), [five]"f"(0x55),
  6395. [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
  6396. [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
  6397. [eight]"f"(0x8), [seven]"f"(0x7),
  6398. [lmove5]"f"(0x5)
  6399. : "memory"
  6400. );
  6401. }
  6402. void NV12ToARGBRow_MMI(const uint8_t* src_y,
  6403. const uint8_t* src_uv,
  6404. uint8_t* rgb_buf,
  6405. const struct YuvConstants* yuvconstants,
  6406. int width) {
  6407. uint64_t y, u, v;
  6408. uint64_t b_vec, g_vec, r_vec, temp;
  6409. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  6410. __asm__ volatile(
  6411. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  6412. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  6413. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  6414. "or %[ub], %[ub], %[mask1] \n\t"
  6415. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  6416. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  6417. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  6418. "pshufh %[ug], %[ug], %[zero] \n\t"
  6419. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  6420. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  6421. "pshufh %[vg], %[vg], %[five] \n\t"
  6422. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  6423. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  6424. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  6425. "pshufh %[vr], %[vr], %[five] \n\t"
  6426. "or %[vr], %[vr], %[mask1] \n\t"
  6427. "1: \n\t"
  6428. "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
  6429. "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
  6430. "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
  6431. "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
  6432. "punpcklbh %[u], %[u], %[zero] \n\t"
  6433. "pshufh %[v], %[u], %[vshu] \n\t"
  6434. "pshufh %[u], %[u], %[ushu] \n\t"
  6435. "punpcklbh %[y], %[y], %[y] \n\t"
  6436. "pmulhuh %[y], %[y], %[yg] \n\t"
  6437. "paddsh %[b_vec], %[y], %[bb] \n\t"
  6438. "pmullh %[temp], %[u], %[ub] \n\t"
  6439. "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
  6440. "psrah %[b_vec], %[b_vec], %[six] \n\t"
  6441. "paddsh %[g_vec], %[y], %[bg] \n\t"
  6442. "pmullh %[temp], %[u], %[ug] \n\t"
  6443. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6444. "pmullh %[temp], %[v], %[vg] \n\t"
  6445. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6446. "psrah %[g_vec], %[g_vec], %[six] \n\t"
  6447. "paddsh %[r_vec], %[y], %[br] \n\t"
  6448. "pmullh %[temp], %[v], %[vr] \n\t"
  6449. "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
  6450. "psrah %[r_vec], %[r_vec], %[six] \n\t"
  6451. "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
  6452. "packushb %[g_vec], %[g_vec], %[zero] \n\t"
  6453. "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
  6454. "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
  6455. "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
  6456. "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
  6457. "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
  6458. "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
  6459. "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
  6460. "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
  6461. "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
  6462. "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
  6463. "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
  6464. "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
  6465. "daddi %[width], %[width], -0x04 \n\t"
  6466. "bnez %[width], 1b \n\t"
  6467. : [y]"=&f"(y), [u]"=&f"(u),
  6468. [v]"=&f"(v),
  6469. [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
  6470. [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
  6471. [ub]"=&f"(ub), [ug]"=&f"(ug),
  6472. [vg]"=&f"(vg), [vr]"=&f"(vr),
  6473. [bb]"=&f"(bb), [bg]"=&f"(bg),
  6474. [br]"=&f"(br), [yg]"=&f"(yg)
  6475. : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
  6476. [rgbbuf_ptr]"r"(rgb_buf),
  6477. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  6478. [zero]"f"(0x00), [five]"f"(0x55),
  6479. [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
  6480. [ushu]"f"(0xA0), [vshu]"f"(0xf5),
  6481. [alpha]"f"(-1)
  6482. : "memory"
  6483. );
  6484. }
  6485. void NV21ToARGBRow_MMI(const uint8_t* src_y,
  6486. const uint8_t* src_vu,
  6487. uint8_t* rgb_buf,
  6488. const struct YuvConstants* yuvconstants,
  6489. int width) {
  6490. uint64_t y, u, v;
  6491. uint64_t b_vec, g_vec, r_vec, temp;
  6492. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  6493. __asm__ volatile(
  6494. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  6495. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  6496. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  6497. "or %[ub], %[ub], %[mask1] \n\t"
  6498. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  6499. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  6500. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  6501. "pshufh %[ug], %[ug], %[zero] \n\t"
  6502. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  6503. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  6504. "pshufh %[vg], %[vg], %[five] \n\t"
  6505. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  6506. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  6507. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  6508. "pshufh %[vr], %[vr], %[five] \n\t"
  6509. "or %[vr], %[vr], %[mask1] \n\t"
  6510. "1: \n\t"
  6511. "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
  6512. "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
  6513. "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t"
  6514. "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t"
  6515. "punpcklbh %[u], %[u], %[zero] \n\t"
  6516. "pshufh %[v], %[u], %[ushu] \n\t"
  6517. "pshufh %[u], %[u], %[vshu] \n\t"
  6518. "punpcklbh %[y], %[y], %[y] \n\t"
  6519. "pmulhuh %[y], %[y], %[yg] \n\t"
  6520. "paddsh %[b_vec], %[y], %[bb] \n\t"
  6521. "pmullh %[temp], %[u], %[ub] \n\t"
  6522. "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
  6523. "psrah %[b_vec], %[b_vec], %[six] \n\t"
  6524. "paddsh %[g_vec], %[y], %[bg] \n\t"
  6525. "pmullh %[temp], %[u], %[ug] \n\t"
  6526. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6527. "pmullh %[temp], %[v], %[vg] \n\t"
  6528. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6529. "psrah %[g_vec], %[g_vec], %[six] \n\t"
  6530. "paddsh %[r_vec], %[y], %[br] \n\t"
  6531. "pmullh %[temp], %[v], %[vr] \n\t"
  6532. "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
  6533. "psrah %[r_vec], %[r_vec], %[six] \n\t"
  6534. "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
  6535. "packushb %[g_vec], %[g_vec], %[zero] \n\t"
  6536. "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
  6537. "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
  6538. "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
  6539. "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
  6540. "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
  6541. "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
  6542. "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
  6543. "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
  6544. "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
  6545. "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
  6546. "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t"
  6547. "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
  6548. "daddi %[width], %[width], -0x04 \n\t"
  6549. "bnez %[width], 1b \n\t"
  6550. : [y]"=&f"(y), [u]"=&f"(u),
  6551. [v]"=&f"(v),
  6552. [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
  6553. [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
  6554. [ub]"=&f"(ub), [ug]"=&f"(ug),
  6555. [vg]"=&f"(vg), [vr]"=&f"(vr),
  6556. [bb]"=&f"(bb), [bg]"=&f"(bg),
  6557. [br]"=&f"(br), [yg]"=&f"(yg)
  6558. : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
  6559. [rgbbuf_ptr]"r"(rgb_buf),
  6560. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  6561. [zero]"f"(0x00), [five]"f"(0x55),
  6562. [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
  6563. [ushu]"f"(0xA0), [vshu]"f"(0xf5),
  6564. [alpha]"f"(-1)
  6565. : "memory"
  6566. );
  6567. }
  6568. void NV12ToRGB24Row_MMI(const uint8_t* src_y,
  6569. const uint8_t* src_uv,
  6570. uint8_t* rgb_buf,
  6571. const struct YuvConstants* yuvconstants,
  6572. int width) {
  6573. uint64_t y, u, v;
  6574. uint64_t b_vec, g_vec, r_vec, temp;
  6575. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  6576. __asm__ volatile(
  6577. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  6578. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  6579. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  6580. "or %[ub], %[ub], %[mask1] \n\t"
  6581. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  6582. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  6583. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  6584. "pshufh %[ug], %[ug], %[zero] \n\t"
  6585. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  6586. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  6587. "pshufh %[vg], %[vg], %[five] \n\t"
  6588. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  6589. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  6590. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  6591. "pshufh %[vr], %[vr], %[five] \n\t"
  6592. "or %[vr], %[vr], %[mask1] \n\t"
  6593. "1: \n\t"
  6594. "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
  6595. "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
  6596. "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
  6597. "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
  6598. "punpcklbh %[u], %[u], %[zero] \n\t"
  6599. "pshufh %[v], %[u], %[vshu] \n\t"
  6600. "pshufh %[u], %[u], %[ushu] \n\t"
  6601. "punpcklbh %[y], %[y], %[y] \n\t"
  6602. "pmulhuh %[y], %[y], %[yg] \n\t"
  6603. "paddsh %[b_vec], %[y], %[bb] \n\t"
  6604. "pmullh %[temp], %[u], %[ub] \n\t"
  6605. "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
  6606. "psrah %[b_vec], %[b_vec], %[six] \n\t"
  6607. "paddsh %[g_vec], %[y], %[bg] \n\t"
  6608. "pmullh %[temp], %[u], %[ug] \n\t"
  6609. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6610. "pmullh %[temp], %[v], %[vg] \n\t"
  6611. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6612. "psrah %[g_vec], %[g_vec], %[six] \n\t"
  6613. "paddsh %[r_vec], %[y], %[br] \n\t"
  6614. "pmullh %[temp], %[v], %[vr] \n\t"
  6615. "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
  6616. "psrah %[r_vec], %[r_vec], %[six] \n\t"
  6617. "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
  6618. "packushb %[g_vec], %[g_vec], %[zero] \n\t"
  6619. "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
  6620. "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
  6621. "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
  6622. "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
  6623. "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t"
  6624. "psllw %[temp], %[r_vec], %[lmove1] \n\t"
  6625. "or %[g_vec], %[g_vec], %[temp] \n\t"
  6626. "psrlw %[temp], %[r_vec], %[rmove1] \n\t"
  6627. "pextrh %[temp], %[temp], %[zero] \n\t"
  6628. "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t"
  6629. "pextrh %[temp], %[b_vec], %[zero] \n\t"
  6630. "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t"
  6631. "pextrh %[temp], %[b_vec], %[one] \n\t"
  6632. "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t"
  6633. "psllw %[b_vec], %[b_vec], %[rmove1] \n\t"
  6634. "or %[b_vec], %[b_vec], %[temp] \n\t"
  6635. "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
  6636. "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
  6637. "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t"
  6638. "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
  6639. "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
  6640. "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
  6641. "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t"
  6642. "daddi %[width], %[width], -0x04 \n\t"
  6643. "bnez %[width], 1b \n\t"
  6644. : [y]"=&f"(y), [u]"=&f"(u),
  6645. [v]"=&f"(v),
  6646. [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
  6647. [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
  6648. [ub]"=&f"(ub), [ug]"=&f"(ug),
  6649. [vg]"=&f"(vg), [vr]"=&f"(vr),
  6650. [bb]"=&f"(bb), [bg]"=&f"(bg),
  6651. [br]"=&f"(br), [yg]"=&f"(yg)
  6652. : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
  6653. [rgbbuf_ptr]"r"(rgb_buf),
  6654. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  6655. [zero]"f"(0x00), [five]"f"(0x55),
  6656. [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
  6657. [ushu]"f"(0xA0), [vshu]"f"(0xf5),
  6658. [alpha]"f"(-1), [lmove1]"f"(0x18),
  6659. [one]"f"(0x1), [rmove1]"f"(0x8)
  6660. : "memory"
  6661. );
  6662. }
  6663. void NV21ToRGB24Row_MMI(const uint8_t* src_y,
  6664. const uint8_t* src_vu,
  6665. uint8_t* rgb_buf,
  6666. const struct YuvConstants* yuvconstants,
  6667. int width) {
  6668. uint64_t y, u, v;
  6669. uint64_t b_vec, g_vec, r_vec, temp;
  6670. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  6671. __asm__ volatile(
  6672. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  6673. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  6674. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  6675. "or %[ub], %[ub], %[mask1] \n\t"
  6676. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  6677. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  6678. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  6679. "pshufh %[ug], %[ug], %[zero] \n\t"
  6680. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  6681. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  6682. "pshufh %[vg], %[vg], %[five] \n\t"
  6683. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  6684. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  6685. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  6686. "pshufh %[vr], %[vr], %[five] \n\t"
  6687. "or %[vr], %[vr], %[mask1] \n\t"
  6688. "1: \n\t"
  6689. "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
  6690. "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
  6691. "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t"
  6692. "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t"
  6693. "punpcklbh %[u], %[u], %[zero] \n\t"
  6694. "pshufh %[v], %[u], %[ushu] \n\t"
  6695. "pshufh %[u], %[u], %[vshu] \n\t"
  6696. "punpcklbh %[y], %[y], %[y] \n\t"
  6697. "pmulhuh %[y], %[y], %[yg] \n\t"
  6698. "paddsh %[b_vec], %[y], %[bb] \n\t"
  6699. "pmullh %[temp], %[u], %[ub] \n\t"
  6700. "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
  6701. "psrah %[b_vec], %[b_vec], %[six] \n\t"
  6702. "paddsh %[g_vec], %[y], %[bg] \n\t"
  6703. "pmullh %[temp], %[u], %[ug] \n\t"
  6704. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6705. "pmullh %[temp], %[v], %[vg] \n\t"
  6706. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6707. "psrah %[g_vec], %[g_vec], %[six] \n\t"
  6708. "paddsh %[r_vec], %[y], %[br] \n\t"
  6709. "pmullh %[temp], %[v], %[vr] \n\t"
  6710. "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
  6711. "psrah %[r_vec], %[r_vec], %[six] \n\t"
  6712. "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
  6713. "packushb %[g_vec], %[g_vec], %[zero] \n\t"
  6714. "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
  6715. "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
  6716. "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
  6717. "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
  6718. "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t"
  6719. "psllw %[temp], %[r_vec], %[lmove1] \n\t"
  6720. "or %[g_vec], %[g_vec], %[temp] \n\t"
  6721. "psrlw %[temp], %[r_vec], %[rmove1] \n\t"
  6722. "pextrh %[temp], %[temp], %[zero] \n\t"
  6723. "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t"
  6724. "pextrh %[temp], %[b_vec], %[zero] \n\t"
  6725. "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t"
  6726. "pextrh %[temp], %[b_vec], %[one] \n\t"
  6727. "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t"
  6728. "psllw %[b_vec], %[b_vec], %[rmove1] \n\t"
  6729. "or %[b_vec], %[b_vec], %[temp] \n\t"
  6730. "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
  6731. "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
  6732. "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t"
  6733. "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
  6734. "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
  6735. "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t"
  6736. "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t"
  6737. "daddi %[width], %[width], -0x04 \n\t"
  6738. "bnez %[width], 1b \n\t"
  6739. : [y]"=&f"(y), [u]"=&f"(u),
  6740. [v]"=&f"(v),
  6741. [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
  6742. [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
  6743. [ub]"=&f"(ub), [ug]"=&f"(ug),
  6744. [vg]"=&f"(vg), [vr]"=&f"(vr),
  6745. [bb]"=&f"(bb), [bg]"=&f"(bg),
  6746. [br]"=&f"(br), [yg]"=&f"(yg)
  6747. : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
  6748. [rgbbuf_ptr]"r"(rgb_buf),
  6749. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  6750. [zero]"f"(0x00), [five]"f"(0x55),
  6751. [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
  6752. [ushu]"f"(0xA0), [vshu]"f"(0xf5),
  6753. [lmove1]"f"(0x18), [rmove1]"f"(0x8),
  6754. [one]"f"(0x1)
  6755. : "memory"
  6756. );
  6757. }
  6758. void NV12ToRGB565Row_MMI(const uint8_t* src_y,
  6759. const uint8_t* src_uv,
  6760. uint8_t* dst_rgb565,
  6761. const struct YuvConstants* yuvconstants,
  6762. int width) {
  6763. uint64_t y, u, v;
  6764. uint64_t b_vec, g_vec, r_vec, temp;
  6765. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  6766. __asm__ volatile(
  6767. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  6768. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  6769. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  6770. "or %[ub], %[ub], %[mask1] \n\t"
  6771. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  6772. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  6773. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  6774. "pshufh %[ug], %[ug], %[zero] \n\t"
  6775. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  6776. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  6777. "pshufh %[vg], %[vg], %[five] \n\t"
  6778. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  6779. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  6780. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  6781. "pshufh %[vr], %[vr], %[five] \n\t"
  6782. "or %[vr], %[vr], %[mask1] \n\t"
  6783. "1: \n\t"
  6784. "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
  6785. "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
  6786. "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
  6787. "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
  6788. "punpcklbh %[u], %[u], %[zero] \n\t"
  6789. "pshufh %[v], %[u], %[vshu] \n\t"
  6790. "pshufh %[u], %[u], %[ushu] \n\t"
  6791. "punpcklbh %[y], %[y], %[y] \n\t"
  6792. "pmulhuh %[y], %[y], %[yg] \n\t"
  6793. "paddsh %[b_vec], %[y], %[bb] \n\t"
  6794. "pmullh %[temp], %[u], %[ub] \n\t"
  6795. "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
  6796. "psrah %[b_vec], %[b_vec], %[six] \n\t"
  6797. "paddsh %[g_vec], %[y], %[bg] \n\t"
  6798. "pmullh %[temp], %[u], %[ug] \n\t"
  6799. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6800. "pmullh %[temp], %[v], %[vg] \n\t"
  6801. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6802. "psrah %[g_vec], %[g_vec], %[six] \n\t"
  6803. "paddsh %[r_vec], %[y], %[br] \n\t"
  6804. "pmullh %[temp], %[v], %[vr] \n\t"
  6805. "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
  6806. "psrah %[r_vec], %[r_vec], %[six] \n\t"
  6807. "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
  6808. "packushb %[g_vec], %[g_vec], %[zero] \n\t"
  6809. "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
  6810. "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
  6811. "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
  6812. "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
  6813. "psrlh %[temp], %[g_vec], %[three] \n\t"
  6814. "and %[g_vec], %[temp], %[mask2] \n\t"
  6815. "psrlw %[temp], %[temp], %[seven] \n\t"
  6816. "psrlw %[r_vec], %[mask1], %[eight] \n\t"
  6817. "and %[r_vec], %[temp], %[r_vec] \n\t"
  6818. "psubb %[y], %[eight], %[three] \n\t"//5
  6819. "psllw %[r_vec], %[r_vec], %[y] \n\t"
  6820. "or %[g_vec], %[g_vec], %[r_vec] \n\t"
  6821. "paddb %[r_vec], %[three], %[six] \n\t"
  6822. "psrlw %[temp], %[temp], %[r_vec] \n\t"
  6823. "and %[r_vec], %[temp], %[mask2] \n\t"
  6824. "paddb %[temp], %[three], %[eight] \n\t"
  6825. "psllw %[r_vec], %[r_vec], %[temp] \n\t"
  6826. "or %[g_vec], %[g_vec], %[r_vec] \n\t"
  6827. "psrlh %[temp], %[b_vec], %[three] \n\t"
  6828. "and %[b_vec], %[temp], %[mask2] \n\t"
  6829. "psrlw %[temp], %[temp], %[seven] \n\t"
  6830. "psrlw %[r_vec], %[mask1], %[eight] \n\t"
  6831. "and %[r_vec], %[temp], %[r_vec] \n\t"
  6832. "psubb %[y], %[eight], %[three] \n\t"//5
  6833. "psllw %[r_vec], %[r_vec], %[y] \n\t"
  6834. "or %[b_vec], %[b_vec], %[r_vec] \n\t"
  6835. "paddb %[r_vec], %[three], %[six] \n\t"
  6836. "psrlw %[temp], %[temp], %[r_vec] \n\t"
  6837. "and %[r_vec], %[temp], %[mask2] \n\t"
  6838. "paddb %[temp], %[three], %[eight] \n\t"
  6839. "psllw %[r_vec], %[r_vec], %[temp] \n\t"
  6840. "or %[b_vec], %[b_vec], %[r_vec] \n\t"
  6841. "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
  6842. "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
  6843. "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
  6844. "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t"
  6845. "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t"
  6846. "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
  6847. "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
  6848. "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t"
  6849. "daddi %[width], %[width], -0x04 \n\t"
  6850. "bnez %[width], 1b \n\t"
  6851. : [y]"=&f"(y), [u]"=&f"(u),
  6852. [v]"=&f"(v),
  6853. [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
  6854. [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
  6855. [ub]"=&f"(ub), [ug]"=&f"(ug),
  6856. [vg]"=&f"(vg), [vr]"=&f"(vr),
  6857. [bb]"=&f"(bb), [bg]"=&f"(bg),
  6858. [br]"=&f"(br), [yg]"=&f"(yg)
  6859. : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
  6860. [dst_rgb565]"r"(dst_rgb565),
  6861. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  6862. [zero]"f"(0x00), [five]"f"(0x55),
  6863. [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
  6864. [ushu]"f"(0xA0), [vshu]"f"(0xf5),
  6865. [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
  6866. [eight]"f"(0x8), [seven]"f"(0x7)
  6867. : "memory"
  6868. );
  6869. }
  6870. void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
  6871. uint8_t* rgb_buf,
  6872. const struct YuvConstants* yuvconstants,
  6873. int width) {
  6874. uint64_t y, u, v;
  6875. uint64_t b_vec, g_vec, r_vec, temp;
  6876. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  6877. __asm__ volatile(
  6878. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  6879. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  6880. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  6881. "or %[ub], %[ub], %[mask1] \n\t"
  6882. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  6883. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  6884. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  6885. "pshufh %[ug], %[ug], %[zero] \n\t"
  6886. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  6887. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  6888. "pshufh %[vg], %[vg], %[five] \n\t"
  6889. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  6890. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  6891. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  6892. "pshufh %[vr], %[vr], %[five] \n\t"
  6893. "or %[vr], %[vr], %[mask1] \n\t"
  6894. "1: \n\t"
  6895. "gsldlc1 %[y], 0x07(%[yuy2_ptr]) \n\t"
  6896. "gsldrc1 %[y], 0x00(%[yuy2_ptr]) \n\t"
  6897. "psrlh %[temp], %[y], %[eight] \n\t"
  6898. "pshufh %[u], %[temp], %[ushu] \n\t"
  6899. "pshufh %[v], %[temp], %[vshu] \n\t"
  6900. "psrlh %[temp], %[mask1], %[eight] \n\t"
  6901. "and %[y], %[y], %[temp] \n\t"
  6902. "psllh %[temp], %[y], %[eight] \n\t"
  6903. "or %[y], %[y], %[temp] \n\t"
  6904. "pmulhuh %[y], %[y], %[yg] \n\t"
  6905. "paddsh %[b_vec], %[y], %[bb] \n\t"
  6906. "pmullh %[temp], %[u], %[ub] \n\t"
  6907. "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
  6908. "psrah %[b_vec], %[b_vec], %[six] \n\t"
  6909. "paddsh %[g_vec], %[y], %[bg] \n\t"
  6910. "pmullh %[temp], %[u], %[ug] \n\t"
  6911. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6912. "pmullh %[temp], %[v], %[vg] \n\t"
  6913. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6914. "psrah %[g_vec], %[g_vec], %[six] \n\t"
  6915. "paddsh %[r_vec], %[y], %[br] \n\t"
  6916. "pmullh %[temp], %[v], %[vr] \n\t"
  6917. "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
  6918. "psrah %[r_vec], %[r_vec], %[six] \n\t"
  6919. "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
  6920. "packushb %[g_vec], %[g_vec], %[zero] \n\t"
  6921. "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
  6922. "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
  6923. "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
  6924. "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
  6925. "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
  6926. "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
  6927. "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
  6928. "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
  6929. "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
  6930. "daddiu %[yuy2_ptr], %[yuy2_ptr], 0x08 \n\t"
  6931. "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
  6932. "daddi %[width], %[width], -0x04 \n\t"
  6933. "bnez %[width], 1b \n\t"
  6934. : [y]"=&f"(y), [u]"=&f"(u),
  6935. [v]"=&f"(v),
  6936. [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
  6937. [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
  6938. [ub]"=&f"(ub), [ug]"=&f"(ug),
  6939. [vg]"=&f"(vg), [vr]"=&f"(vr),
  6940. [bb]"=&f"(bb), [bg]"=&f"(bg),
  6941. [br]"=&f"(br), [yg]"=&f"(yg)
  6942. : [yuy2_ptr]"r"(src_yuy2), [rgbbuf_ptr]"r"(rgb_buf),
  6943. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  6944. [zero]"f"(0x00), [five]"f"(0x55),
  6945. [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
  6946. [ushu]"f"(0xA0), [vshu]"f"(0xf5),
  6947. [alpha]"f"(-1), [eight]"f"(0x8)
  6948. : "memory"
  6949. );
  6950. }
  6951. void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
  6952. uint8_t* rgb_buf,
  6953. const struct YuvConstants* yuvconstants,
  6954. int width) {
  6955. uint64_t y, u, v;
  6956. uint64_t b_vec, g_vec, r_vec, temp;
  6957. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  6958. __asm__ volatile(
  6959. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  6960. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  6961. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  6962. "or %[ub], %[ub], %[mask1] \n\t"
  6963. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  6964. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  6965. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  6966. "pshufh %[ug], %[ug], %[zero] \n\t"
  6967. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  6968. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  6969. "pshufh %[vg], %[vg], %[five] \n\t"
  6970. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  6971. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  6972. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  6973. "pshufh %[vr], %[vr], %[five] \n\t"
  6974. "or %[vr], %[vr], %[mask1] \n\t"
  6975. "1: \n\t"
  6976. "gsldlc1 %[y], 0x07(%[uyvy_ptr]) \n\t"
  6977. "gsldrc1 %[y], 0x00(%[uyvy_ptr]) \n\t"
  6978. "psrlh %[temp], %[mask1], %[eight] \n\t"
  6979. "and %[temp], %[y], %[temp] \n\t"
  6980. "pshufh %[u], %[temp], %[ushu] \n\t"
  6981. "pshufh %[v], %[temp], %[vshu] \n\t"
  6982. "psrlh %[y], %[y], %[eight] \n\t"
  6983. "psllh %[temp], %[y], %[eight] \n\t"
  6984. "or %[y], %[y], %[temp] \n\t"
  6985. "pmulhuh %[y], %[y], %[yg] \n\t"
  6986. "paddsh %[b_vec], %[y], %[bb] \n\t"
  6987. "pmullh %[temp], %[u], %[ub] \n\t"
  6988. "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
  6989. "psrah %[b_vec], %[b_vec], %[six] \n\t"
  6990. "paddsh %[g_vec], %[y], %[bg] \n\t"
  6991. "pmullh %[temp], %[u], %[ug] \n\t"
  6992. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6993. "pmullh %[temp], %[v], %[vg] \n\t"
  6994. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  6995. "psrah %[g_vec], %[g_vec], %[six] \n\t"
  6996. "paddsh %[r_vec], %[y], %[br] \n\t"
  6997. "pmullh %[temp], %[v], %[vr] \n\t"
  6998. "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
  6999. "psrah %[r_vec], %[r_vec], %[six] \n\t"
  7000. "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
  7001. "packushb %[g_vec], %[g_vec], %[zero] \n\t"
  7002. "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
  7003. "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
  7004. "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
  7005. "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
  7006. "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
  7007. "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
  7008. "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
  7009. "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
  7010. "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
  7011. "daddiu %[uyvy_ptr], %[uyvy_ptr], 0x08 \n\t"
  7012. "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
  7013. "daddi %[width], %[width], -0x04 \n\t"
  7014. "bnez %[width], 1b \n\t"
  7015. : [y]"=&f"(y), [u]"=&f"(u),
  7016. [v]"=&f"(v),
  7017. [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
  7018. [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
  7019. [ub]"=&f"(ub), [ug]"=&f"(ug),
  7020. [vg]"=&f"(vg), [vr]"=&f"(vr),
  7021. [bb]"=&f"(bb), [bg]"=&f"(bg),
  7022. [br]"=&f"(br), [yg]"=&f"(yg)
  7023. : [uyvy_ptr]"r"(src_uyvy), [rgbbuf_ptr]"r"(rgb_buf),
  7024. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  7025. [zero]"f"(0x00), [five]"f"(0x55),
  7026. [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
  7027. [ushu]"f"(0xA0), [vshu]"f"(0xf5),
  7028. [alpha]"f"(-1), [eight]"f"(0x8)
  7029. : "memory"
  7030. );
  7031. }
  7032. void I422ToRGBARow_MMI(const uint8_t* src_y,
  7033. const uint8_t* src_u,
  7034. const uint8_t* src_v,
  7035. uint8_t* rgb_buf,
  7036. const struct YuvConstants* yuvconstants,
  7037. int width) {
  7038. uint64_t y, u, v;
  7039. uint64_t b_vec, g_vec, r_vec, temp;
  7040. uint64_t ub,ug,vg,vr,bb,bg,br,yg;
  7041. __asm__ volatile(
  7042. "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
  7043. "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
  7044. "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
  7045. "or %[ub], %[ub], %[mask1] \n\t"
  7046. "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
  7047. "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
  7048. "punpcklbh %[ug], %[ug], %[zero] \n\t"
  7049. "pshufh %[ug], %[ug], %[zero] \n\t"
  7050. "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
  7051. "punpcklbh %[vg], %[vg], %[zero] \n\t"
  7052. "pshufh %[vg], %[vg], %[five] \n\t"
  7053. "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
  7054. "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
  7055. "punpcklbh %[vr], %[vr], %[zero] \n\t"
  7056. "pshufh %[vr], %[vr], %[five] \n\t"
  7057. "or %[vr], %[vr], %[mask1] \n\t"
  7058. "1: \n\t"
  7059. "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
  7060. "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
  7061. "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
  7062. "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
  7063. "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
  7064. "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
  7065. "punpcklbh %[y], %[y], %[y] \n\t"
  7066. "pmulhuh %[y], %[y], %[yg] \n\t"
  7067. "punpcklbh %[u], %[u], %[u] \n\t"
  7068. "punpcklbh %[u], %[u], %[zero] \n\t"
  7069. "paddsh %[b_vec], %[y], %[bb] \n\t"
  7070. "pmullh %[temp], %[u], %[ub] \n\t"
  7071. "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
  7072. "psrah %[b_vec], %[b_vec], %[six] \n\t"
  7073. "punpcklbh %[v], %[v], %[v] \n\t"
  7074. "punpcklbh %[v], %[v], %[zero] \n\t"
  7075. "paddsh %[g_vec], %[y], %[bg] \n\t"
  7076. "pmullh %[temp], %[u], %[ug] \n\t"
  7077. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  7078. "pmullh %[temp], %[v], %[vg] \n\t"
  7079. "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
  7080. "psrah %[g_vec], %[g_vec], %[six] \n\t"
  7081. "paddsh %[r_vec], %[y], %[br] \n\t"
  7082. "pmullh %[temp], %[v], %[vr] \n\t"
  7083. "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
  7084. "psrah %[r_vec], %[r_vec], %[six] \n\t"
  7085. "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
  7086. "packushb %[g_vec], %[g_vec], %[zero] \n\t"
  7087. "punpcklwd %[g_vec], %[alpha], %[g_vec] \n\t"
  7088. "punpcklbh %[b_vec], %[g_vec], %[r_vec] \n\t"
  7089. "punpckhbh %[r_vec], %[g_vec], %[r_vec] \n\t"
  7090. "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
  7091. "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
  7092. "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
  7093. "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
  7094. "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
  7095. "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
  7096. "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
  7097. "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
  7098. "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
  7099. "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
  7100. "daddi %[width], %[width], -0x04 \n\t"
  7101. "bnez %[width], 1b \n\t"
  7102. : [y]"=&f"(y), [u]"=&f"(u),
  7103. [v]"=&f"(v),
  7104. [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
  7105. [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
  7106. [ub]"=&f"(ub), [ug]"=&f"(ug),
  7107. [vg]"=&f"(vg), [vr]"=&f"(vr),
  7108. [bb]"=&f"(bb), [bg]"=&f"(bg),
  7109. [br]"=&f"(br), [yg]"=&f"(yg)
  7110. : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
  7111. [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
  7112. [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
  7113. [zero]"f"(0x00), [five]"f"(0x55),
  7114. [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
  7115. [alpha]"f"(-1)
  7116. : "memory"
  7117. );
  7118. }
  7119. void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) {
  7120. __asm__ volatile (
  7121. "punpcklwd %[v32], %[v32], %[v32] \n\t"
  7122. "1: \n\t"
  7123. "gssdlc1 %[v32], 0x07(%[dst_ptr]) \n\t"
  7124. "gssdrc1 %[v32], 0x00(%[dst_ptr]) \n\t"
  7125. "gssdlc1 %[v32], 0x0f(%[dst_ptr]) \n\t"
  7126. "gssdrc1 %[v32], 0x08(%[dst_ptr]) \n\t"
  7127. "daddi %[width], %[width], -0x04 \n\t"
  7128. "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
  7129. "bnez %[width], 1b \n\t"
  7130. : [v32]"+&f"(v32)
  7131. : [dst_ptr]"r"(dst_argb), [width]"r"(width)
  7132. : "memory"
  7133. );
  7134. }
  7135. // clang-format on
  7136. // 10 bit YUV to ARGB
  7137. #endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
  7138. #ifdef __cplusplus
  7139. } // extern "C"
  7140. } // namespace libyuv
  7141. #endif