scale_any.cc 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. /*
  2. * Copyright 2015 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <string.h> // For memset/memcpy
  11. #include "libyuv/scale.h"
  12. #include "libyuv/scale_row.h"
  13. #include "libyuv/basic_types.h"
  14. #ifdef __cplusplus
  15. namespace libyuv {
  16. extern "C" {
  17. #endif
  18. // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
  19. #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
  20. void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \
  21. int dx) { \
  22. int r = dst_width & MASK; \
  23. int n = dst_width & ~MASK; \
  24. if (n > 0) { \
  25. TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
  26. } \
  27. TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \
  28. }
  29. #ifdef HAS_SCALEFILTERCOLS_NEON
  30. CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
  31. #endif
  32. #ifdef HAS_SCALEFILTERCOLS_MSA
  33. CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15)
  34. #endif
  35. #ifdef HAS_SCALEARGBCOLS_NEON
  36. CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
  37. #endif
  38. #ifdef HAS_SCALEARGBCOLS_MSA
  39. CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3)
  40. #endif
  41. #ifdef HAS_SCALEARGBCOLS_MMI
  42. CANY(ScaleARGBCols_Any_MMI, ScaleARGBCols_MMI, ScaleARGBCols_C, 4, 0)
  43. #endif
  44. #ifdef HAS_SCALEARGBFILTERCOLS_NEON
  45. CANY(ScaleARGBFilterCols_Any_NEON,
  46. ScaleARGBFilterCols_NEON,
  47. ScaleARGBFilterCols_C,
  48. 4,
  49. 3)
  50. #endif
  51. #ifdef HAS_SCALEARGBFILTERCOLS_MSA
  52. CANY(ScaleARGBFilterCols_Any_MSA,
  53. ScaleARGBFilterCols_MSA,
  54. ScaleARGBFilterCols_C,
  55. 4,
  56. 7)
  57. #endif
  58. #undef CANY
  59. // Fixed scale down.
  60. // Mask may be non-power of 2, so use MOD
  61. #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
  62. void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
  63. int dst_width) { \
  64. int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \
  65. int n = dst_width - r; \
  66. if (n > 0) { \
  67. SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
  68. } \
  69. SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
  70. dst_ptr + n * BPP, r); \
  71. }
  72. // Fixed scale down for odd source width. Used by I420Blend subsampling.
  73. // Since dst_width is (width + 1) / 2, this function scales one less pixel
  74. // and copies the last pixel.
  75. #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
  76. void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
  77. int dst_width) { \
  78. int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \
  79. int n = (dst_width - 1) - r; \
  80. if (n > 0) { \
  81. SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
  82. } \
  83. SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
  84. dst_ptr + n * BPP, r + 1); \
  85. }
  86. #ifdef HAS_SCALEROWDOWN2_SSSE3
  87. SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
  88. SDANY(ScaleRowDown2Linear_Any_SSSE3,
  89. ScaleRowDown2Linear_SSSE3,
  90. ScaleRowDown2Linear_C,
  91. 2,
  92. 1,
  93. 15)
  94. SDANY(ScaleRowDown2Box_Any_SSSE3,
  95. ScaleRowDown2Box_SSSE3,
  96. ScaleRowDown2Box_C,
  97. 2,
  98. 1,
  99. 15)
  100. SDODD(ScaleRowDown2Box_Odd_SSSE3,
  101. ScaleRowDown2Box_SSSE3,
  102. ScaleRowDown2Box_Odd_C,
  103. 2,
  104. 1,
  105. 15)
  106. #endif
  107. #ifdef HAS_SCALEROWDOWN2_AVX2
  108. SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
  109. SDANY(ScaleRowDown2Linear_Any_AVX2,
  110. ScaleRowDown2Linear_AVX2,
  111. ScaleRowDown2Linear_C,
  112. 2,
  113. 1,
  114. 31)
  115. SDANY(ScaleRowDown2Box_Any_AVX2,
  116. ScaleRowDown2Box_AVX2,
  117. ScaleRowDown2Box_C,
  118. 2,
  119. 1,
  120. 31)
  121. SDODD(ScaleRowDown2Box_Odd_AVX2,
  122. ScaleRowDown2Box_AVX2,
  123. ScaleRowDown2Box_Odd_C,
  124. 2,
  125. 1,
  126. 31)
  127. #endif
  128. #ifdef HAS_SCALEROWDOWN2_NEON
  129. SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
  130. SDANY(ScaleRowDown2Linear_Any_NEON,
  131. ScaleRowDown2Linear_NEON,
  132. ScaleRowDown2Linear_C,
  133. 2,
  134. 1,
  135. 15)
  136. SDANY(ScaleRowDown2Box_Any_NEON,
  137. ScaleRowDown2Box_NEON,
  138. ScaleRowDown2Box_C,
  139. 2,
  140. 1,
  141. 15)
  142. SDODD(ScaleRowDown2Box_Odd_NEON,
  143. ScaleRowDown2Box_NEON,
  144. ScaleRowDown2Box_Odd_C,
  145. 2,
  146. 1,
  147. 15)
  148. #endif
  149. #ifdef HAS_SCALEROWDOWN2_MSA
  150. SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31)
  151. SDANY(ScaleRowDown2Linear_Any_MSA,
  152. ScaleRowDown2Linear_MSA,
  153. ScaleRowDown2Linear_C,
  154. 2,
  155. 1,
  156. 31)
  157. SDANY(ScaleRowDown2Box_Any_MSA,
  158. ScaleRowDown2Box_MSA,
  159. ScaleRowDown2Box_C,
  160. 2,
  161. 1,
  162. 31)
  163. #endif
  164. #ifdef HAS_SCALEROWDOWN2_MMI
  165. SDANY(ScaleRowDown2_Any_MMI, ScaleRowDown2_MMI, ScaleRowDown2_C, 2, 1, 7)
  166. SDANY(ScaleRowDown2Linear_Any_MMI,
  167. ScaleRowDown2Linear_MMI,
  168. ScaleRowDown2Linear_C,
  169. 2,
  170. 1,
  171. 7)
  172. SDANY(ScaleRowDown2Box_Any_MMI,
  173. ScaleRowDown2Box_MMI,
  174. ScaleRowDown2Box_C,
  175. 2,
  176. 1,
  177. 7)
  178. SDODD(ScaleRowDown2Box_Odd_MMI,
  179. ScaleRowDown2Box_MMI,
  180. ScaleRowDown2Box_Odd_C,
  181. 2,
  182. 1,
  183. 7)
  184. #endif
  185. #ifdef HAS_SCALEROWDOWN4_SSSE3
  186. SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
  187. SDANY(ScaleRowDown4Box_Any_SSSE3,
  188. ScaleRowDown4Box_SSSE3,
  189. ScaleRowDown4Box_C,
  190. 4,
  191. 1,
  192. 7)
  193. #endif
  194. #ifdef HAS_SCALEROWDOWN4_AVX2
  195. SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
  196. SDANY(ScaleRowDown4Box_Any_AVX2,
  197. ScaleRowDown4Box_AVX2,
  198. ScaleRowDown4Box_C,
  199. 4,
  200. 1,
  201. 15)
  202. #endif
  203. #ifdef HAS_SCALEROWDOWN4_NEON
  204. SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7)
  205. SDANY(ScaleRowDown4Box_Any_NEON,
  206. ScaleRowDown4Box_NEON,
  207. ScaleRowDown4Box_C,
  208. 4,
  209. 1,
  210. 7)
  211. #endif
  212. #ifdef HAS_SCALEROWDOWN4_MSA
  213. SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15)
  214. SDANY(ScaleRowDown4Box_Any_MSA,
  215. ScaleRowDown4Box_MSA,
  216. ScaleRowDown4Box_C,
  217. 4,
  218. 1,
  219. 15)
  220. #endif
  221. #ifdef HAS_SCALEROWDOWN4_MMI
  222. SDANY(ScaleRowDown4_Any_MMI, ScaleRowDown4_MMI, ScaleRowDown4_C, 4, 1, 7)
  223. SDANY(ScaleRowDown4Box_Any_MMI,
  224. ScaleRowDown4Box_MMI,
  225. ScaleRowDown4Box_C,
  226. 4,
  227. 1,
  228. 7)
  229. #endif
  230. #ifdef HAS_SCALEROWDOWN34_SSSE3
  231. SDANY(ScaleRowDown34_Any_SSSE3,
  232. ScaleRowDown34_SSSE3,
  233. ScaleRowDown34_C,
  234. 4 / 3,
  235. 1,
  236. 23)
  237. SDANY(ScaleRowDown34_0_Box_Any_SSSE3,
  238. ScaleRowDown34_0_Box_SSSE3,
  239. ScaleRowDown34_0_Box_C,
  240. 4 / 3,
  241. 1,
  242. 23)
  243. SDANY(ScaleRowDown34_1_Box_Any_SSSE3,
  244. ScaleRowDown34_1_Box_SSSE3,
  245. ScaleRowDown34_1_Box_C,
  246. 4 / 3,
  247. 1,
  248. 23)
  249. #endif
  250. #ifdef HAS_SCALEROWDOWN34_NEON
  251. SDANY(ScaleRowDown34_Any_NEON,
  252. ScaleRowDown34_NEON,
  253. ScaleRowDown34_C,
  254. 4 / 3,
  255. 1,
  256. 23)
  257. SDANY(ScaleRowDown34_0_Box_Any_NEON,
  258. ScaleRowDown34_0_Box_NEON,
  259. ScaleRowDown34_0_Box_C,
  260. 4 / 3,
  261. 1,
  262. 23)
  263. SDANY(ScaleRowDown34_1_Box_Any_NEON,
  264. ScaleRowDown34_1_Box_NEON,
  265. ScaleRowDown34_1_Box_C,
  266. 4 / 3,
  267. 1,
  268. 23)
  269. #endif
  270. #ifdef HAS_SCALEROWDOWN34_MSA
  271. SDANY(ScaleRowDown34_Any_MSA,
  272. ScaleRowDown34_MSA,
  273. ScaleRowDown34_C,
  274. 4 / 3,
  275. 1,
  276. 47)
  277. SDANY(ScaleRowDown34_0_Box_Any_MSA,
  278. ScaleRowDown34_0_Box_MSA,
  279. ScaleRowDown34_0_Box_C,
  280. 4 / 3,
  281. 1,
  282. 47)
  283. SDANY(ScaleRowDown34_1_Box_Any_MSA,
  284. ScaleRowDown34_1_Box_MSA,
  285. ScaleRowDown34_1_Box_C,
  286. 4 / 3,
  287. 1,
  288. 47)
  289. #endif
  290. #ifdef HAS_SCALEROWDOWN34_MMI
  291. SDANY(ScaleRowDown34_Any_MMI,
  292. ScaleRowDown34_MMI,
  293. ScaleRowDown34_C,
  294. 4 / 3,
  295. 1,
  296. 23)
  297. #endif
  298. #ifdef HAS_SCALEROWDOWN38_SSSE3
  299. SDANY(ScaleRowDown38_Any_SSSE3,
  300. ScaleRowDown38_SSSE3,
  301. ScaleRowDown38_C,
  302. 8 / 3,
  303. 1,
  304. 11)
  305. SDANY(ScaleRowDown38_3_Box_Any_SSSE3,
  306. ScaleRowDown38_3_Box_SSSE3,
  307. ScaleRowDown38_3_Box_C,
  308. 8 / 3,
  309. 1,
  310. 5)
  311. SDANY(ScaleRowDown38_2_Box_Any_SSSE3,
  312. ScaleRowDown38_2_Box_SSSE3,
  313. ScaleRowDown38_2_Box_C,
  314. 8 / 3,
  315. 1,
  316. 5)
  317. #endif
  318. #ifdef HAS_SCALEROWDOWN38_NEON
  319. SDANY(ScaleRowDown38_Any_NEON,
  320. ScaleRowDown38_NEON,
  321. ScaleRowDown38_C,
  322. 8 / 3,
  323. 1,
  324. 11)
  325. SDANY(ScaleRowDown38_3_Box_Any_NEON,
  326. ScaleRowDown38_3_Box_NEON,
  327. ScaleRowDown38_3_Box_C,
  328. 8 / 3,
  329. 1,
  330. 11)
  331. SDANY(ScaleRowDown38_2_Box_Any_NEON,
  332. ScaleRowDown38_2_Box_NEON,
  333. ScaleRowDown38_2_Box_C,
  334. 8 / 3,
  335. 1,
  336. 11)
  337. #endif
  338. #ifdef HAS_SCALEROWDOWN38_MSA
  339. SDANY(ScaleRowDown38_Any_MSA,
  340. ScaleRowDown38_MSA,
  341. ScaleRowDown38_C,
  342. 8 / 3,
  343. 1,
  344. 11)
  345. SDANY(ScaleRowDown38_3_Box_Any_MSA,
  346. ScaleRowDown38_3_Box_MSA,
  347. ScaleRowDown38_3_Box_C,
  348. 8 / 3,
  349. 1,
  350. 11)
  351. SDANY(ScaleRowDown38_2_Box_Any_MSA,
  352. ScaleRowDown38_2_Box_MSA,
  353. ScaleRowDown38_2_Box_C,
  354. 8 / 3,
  355. 1,
  356. 11)
  357. #endif
  358. #ifdef HAS_SCALEARGBROWDOWN2_SSE2
  359. SDANY(ScaleARGBRowDown2_Any_SSE2,
  360. ScaleARGBRowDown2_SSE2,
  361. ScaleARGBRowDown2_C,
  362. 2,
  363. 4,
  364. 3)
  365. SDANY(ScaleARGBRowDown2Linear_Any_SSE2,
  366. ScaleARGBRowDown2Linear_SSE2,
  367. ScaleARGBRowDown2Linear_C,
  368. 2,
  369. 4,
  370. 3)
  371. SDANY(ScaleARGBRowDown2Box_Any_SSE2,
  372. ScaleARGBRowDown2Box_SSE2,
  373. ScaleARGBRowDown2Box_C,
  374. 2,
  375. 4,
  376. 3)
  377. #endif
  378. #ifdef HAS_SCALEARGBROWDOWN2_NEON
  379. SDANY(ScaleARGBRowDown2_Any_NEON,
  380. ScaleARGBRowDown2_NEON,
  381. ScaleARGBRowDown2_C,
  382. 2,
  383. 4,
  384. 7)
  385. SDANY(ScaleARGBRowDown2Linear_Any_NEON,
  386. ScaleARGBRowDown2Linear_NEON,
  387. ScaleARGBRowDown2Linear_C,
  388. 2,
  389. 4,
  390. 7)
  391. SDANY(ScaleARGBRowDown2Box_Any_NEON,
  392. ScaleARGBRowDown2Box_NEON,
  393. ScaleARGBRowDown2Box_C,
  394. 2,
  395. 4,
  396. 7)
  397. #endif
  398. #ifdef HAS_SCALEARGBROWDOWN2_MSA
  399. SDANY(ScaleARGBRowDown2_Any_MSA,
  400. ScaleARGBRowDown2_MSA,
  401. ScaleARGBRowDown2_C,
  402. 2,
  403. 4,
  404. 3)
  405. SDANY(ScaleARGBRowDown2Linear_Any_MSA,
  406. ScaleARGBRowDown2Linear_MSA,
  407. ScaleARGBRowDown2Linear_C,
  408. 2,
  409. 4,
  410. 3)
  411. SDANY(ScaleARGBRowDown2Box_Any_MSA,
  412. ScaleARGBRowDown2Box_MSA,
  413. ScaleARGBRowDown2Box_C,
  414. 2,
  415. 4,
  416. 3)
  417. #endif
  418. #ifdef HAS_SCALEARGBROWDOWN2_MMI
  419. SDANY(ScaleARGBRowDown2_Any_MMI,
  420. ScaleARGBRowDown2_MMI,
  421. ScaleARGBRowDown2_C,
  422. 2,
  423. 4,
  424. 1)
  425. SDANY(ScaleARGBRowDown2Linear_Any_MMI,
  426. ScaleARGBRowDown2Linear_MMI,
  427. ScaleARGBRowDown2Linear_C,
  428. 2,
  429. 4,
  430. 1)
  431. SDANY(ScaleARGBRowDown2Box_Any_MMI,
  432. ScaleARGBRowDown2Box_MMI,
  433. ScaleARGBRowDown2Box_C,
  434. 2,
  435. 4,
  436. 1)
  437. #endif
  438. #undef SDANY
  439. // Scale down by even scale factor.
  440. #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
  441. void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \
  442. uint8_t* dst_ptr, int dst_width) { \
  443. int r = dst_width & MASK; \
  444. int n = dst_width & ~MASK; \
  445. if (n > 0) { \
  446. SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
  447. } \
  448. SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \
  449. dst_ptr + n * BPP, r); \
  450. }
  451. #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
  452. SDAANY(ScaleARGBRowDownEven_Any_SSE2,
  453. ScaleARGBRowDownEven_SSE2,
  454. ScaleARGBRowDownEven_C,
  455. 4,
  456. 3)
  457. SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2,
  458. ScaleARGBRowDownEvenBox_SSE2,
  459. ScaleARGBRowDownEvenBox_C,
  460. 4,
  461. 3)
  462. #endif
  463. #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
  464. SDAANY(ScaleARGBRowDownEven_Any_NEON,
  465. ScaleARGBRowDownEven_NEON,
  466. ScaleARGBRowDownEven_C,
  467. 4,
  468. 3)
  469. SDAANY(ScaleARGBRowDownEvenBox_Any_NEON,
  470. ScaleARGBRowDownEvenBox_NEON,
  471. ScaleARGBRowDownEvenBox_C,
  472. 4,
  473. 3)
  474. #endif
  475. #ifdef HAS_SCALEARGBROWDOWNEVEN_MSA
  476. SDAANY(ScaleARGBRowDownEven_Any_MSA,
  477. ScaleARGBRowDownEven_MSA,
  478. ScaleARGBRowDownEven_C,
  479. 4,
  480. 3)
  481. SDAANY(ScaleARGBRowDownEvenBox_Any_MSA,
  482. ScaleARGBRowDownEvenBox_MSA,
  483. ScaleARGBRowDownEvenBox_C,
  484. 4,
  485. 3)
  486. #endif
  487. #ifdef HAS_SCALEARGBROWDOWNEVEN_MMI
  488. SDAANY(ScaleARGBRowDownEven_Any_MMI,
  489. ScaleARGBRowDownEven_MMI,
  490. ScaleARGBRowDownEven_C,
  491. 4,
  492. 1)
  493. SDAANY(ScaleARGBRowDownEvenBox_Any_MMI,
  494. ScaleARGBRowDownEvenBox_MMI,
  495. ScaleARGBRowDownEvenBox_C,
  496. 4,
  497. 1)
  498. #endif
  499. #ifdef SASIMDONLY
  500. // This also works and uses memcpy and SIMD instead of C, but is slower on ARM
  501. // Add rows box filter scale down. Using macro from row_any
  502. #define SAROW(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
  503. void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int width) { \
  504. SIMD_ALIGNED(uint16_t dst_temp[32]); \
  505. SIMD_ALIGNED(uint8_t src_temp[32]); \
  506. memset(dst_temp, 0, 32 * 2); /* for msan */ \
  507. int r = width & MASK; \
  508. int n = width & ~MASK; \
  509. if (n > 0) { \
  510. ANY_SIMD(src_ptr, dst_ptr, n); \
  511. } \
  512. memcpy(src_temp, src_ptr + n * SBPP, r * SBPP); \
  513. memcpy(dst_temp, dst_ptr + n * BPP, r * BPP); \
  514. ANY_SIMD(src_temp, dst_temp, MASK + 1); \
  515. memcpy(dst_ptr + n * BPP, dst_temp, r * BPP); \
  516. }
  517. #ifdef HAS_SCALEADDROW_SSE2
  518. SAROW(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, 1, 2, 15)
  519. #endif
  520. #ifdef HAS_SCALEADDROW_AVX2
  521. SAROW(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, 1, 2, 31)
  522. #endif
  523. #ifdef HAS_SCALEADDROW_NEON
  524. SAROW(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, 1, 2, 15)
  525. #endif
  526. #ifdef HAS_SCALEADDROW_MSA
  527. SAROW(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, 1, 2, 15)
  528. #endif
  529. #ifdef HAS_SCALEADDROW_MMI
  530. SAROW(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, 1, 2, 7)
  531. #endif
  532. #undef SAANY
  533. #else
  534. // Add rows box filter scale down.
  535. #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
  536. void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \
  537. int n = src_width & ~MASK; \
  538. if (n > 0) { \
  539. SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
  540. } \
  541. SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
  542. }
  543. #ifdef HAS_SCALEADDROW_SSE2
  544. SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
  545. #endif
  546. #ifdef HAS_SCALEADDROW_AVX2
  547. SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
  548. #endif
  549. #ifdef HAS_SCALEADDROW_NEON
  550. SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
  551. #endif
  552. #ifdef HAS_SCALEADDROW_MSA
  553. SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
  554. #endif
  555. #ifdef HAS_SCALEADDROW_MMI
  556. SAANY(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, ScaleAddRow_C, 7)
  557. #endif
  558. #undef SAANY
  559. #endif // SASIMDONLY
  560. #ifdef __cplusplus
  561. } // extern "C"
  562. } // namespace libyuv
  563. #endif