scale_argb_test.cc 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <stdlib.h>
  11. #include <time.h>
  12. #include "../unit_test/unit_test.h"
  13. #include "libyuv/convert_argb.h"
  14. #include "libyuv/cpu_id.h"
  15. #include "libyuv/scale_argb.h"
  16. #include "libyuv/video_common.h"
  17. namespace libyuv {
  18. #define STRINGIZE(line) #line
  19. #define FILELINESTR(file, line) file ":" STRINGIZE(line)
  20. // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
  21. static int ARGBTestFilter(int src_width,
  22. int src_height,
  23. int dst_width,
  24. int dst_height,
  25. FilterMode f,
  26. int benchmark_iterations,
  27. int disable_cpu_flags,
  28. int benchmark_cpu_info) {
  29. if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
  30. return 0;
  31. }
  32. int i, j;
  33. const int b = 0; // 128 to test for padding/stride.
  34. int64_t src_argb_plane_size =
  35. (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4LL;
  36. int src_stride_argb = (b * 2 + Abs(src_width)) * 4;
  37. align_buffer_page_end(src_argb, src_argb_plane_size);
  38. if (!src_argb) {
  39. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  40. return 0;
  41. }
  42. MemRandomize(src_argb, src_argb_plane_size);
  43. int64_t dst_argb_plane_size =
  44. (dst_width + b * 2) * (dst_height + b * 2) * 4LL;
  45. int dst_stride_argb = (b * 2 + dst_width) * 4;
  46. align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
  47. align_buffer_page_end(dst_argb_opt, dst_argb_plane_size);
  48. if (!dst_argb_c || !dst_argb_opt) {
  49. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  50. return 0;
  51. }
  52. memset(dst_argb_c, 2, dst_argb_plane_size);
  53. memset(dst_argb_opt, 3, dst_argb_plane_size);
  54. // Warm up both versions for consistent benchmarks.
  55. MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
  56. ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
  57. src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4,
  58. dst_stride_argb, dst_width, dst_height, f);
  59. MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
  60. ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
  61. src_width, src_height, dst_argb_opt + (dst_stride_argb * b) + b * 4,
  62. dst_stride_argb, dst_width, dst_height, f);
  63. MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
  64. double c_time = get_time();
  65. ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
  66. src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4,
  67. dst_stride_argb, dst_width, dst_height, f);
  68. c_time = (get_time() - c_time);
  69. MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
  70. double opt_time = get_time();
  71. for (i = 0; i < benchmark_iterations; ++i) {
  72. ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
  73. src_width, src_height,
  74. dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb,
  75. dst_width, dst_height, f);
  76. }
  77. opt_time = (get_time() - opt_time) / benchmark_iterations;
  78. // Report performance of C vs OPT
  79. printf("filter %d - %8d us C - %8d us OPT\n", f,
  80. static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
  81. // C version may be a little off from the optimized. Order of
  82. // operations may introduce rounding somewhere. So do a difference
  83. // of the buffers and look to see that the max difference isn't
  84. // over 2.
  85. int max_diff = 0;
  86. for (i = b; i < (dst_height + b); ++i) {
  87. for (j = b * 4; j < (dst_width + b) * 4; ++j) {
  88. int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] -
  89. dst_argb_opt[(i * dst_stride_argb) + j]);
  90. if (abs_diff > max_diff) {
  91. max_diff = abs_diff;
  92. }
  93. }
  94. }
  95. free_aligned_buffer_page_end(dst_argb_c);
  96. free_aligned_buffer_page_end(dst_argb_opt);
  97. free_aligned_buffer_page_end(src_argb);
  98. return max_diff;
  99. }
  100. static const int kTileX = 8;
  101. static const int kTileY = 8;
  102. static int TileARGBScale(const uint8_t* src_argb,
  103. int src_stride_argb,
  104. int src_width,
  105. int src_height,
  106. uint8_t* dst_argb,
  107. int dst_stride_argb,
  108. int dst_width,
  109. int dst_height,
  110. FilterMode filtering) {
  111. for (int y = 0; y < dst_height; y += kTileY) {
  112. for (int x = 0; x < dst_width; x += kTileX) {
  113. int clip_width = kTileX;
  114. if (x + clip_width > dst_width) {
  115. clip_width = dst_width - x;
  116. }
  117. int clip_height = kTileY;
  118. if (y + clip_height > dst_height) {
  119. clip_height = dst_height - y;
  120. }
  121. int r = ARGBScaleClip(src_argb, src_stride_argb, src_width, src_height,
  122. dst_argb, dst_stride_argb, dst_width, dst_height, x,
  123. y, clip_width, clip_height, filtering);
  124. if (r) {
  125. return r;
  126. }
  127. }
  128. }
  129. return 0;
  130. }
  131. static int ARGBClipTestFilter(int src_width,
  132. int src_height,
  133. int dst_width,
  134. int dst_height,
  135. FilterMode f,
  136. int benchmark_iterations) {
  137. if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
  138. return 0;
  139. }
  140. const int b = 128;
  141. int64_t src_argb_plane_size =
  142. (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4;
  143. int src_stride_argb = (b * 2 + Abs(src_width)) * 4;
  144. align_buffer_page_end(src_argb, src_argb_plane_size);
  145. if (!src_argb) {
  146. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  147. return 0;
  148. }
  149. memset(src_argb, 1, src_argb_plane_size);
  150. int64_t dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4;
  151. int dst_stride_argb = (b * 2 + dst_width) * 4;
  152. int i, j;
  153. for (i = b; i < (Abs(src_height) + b); ++i) {
  154. for (j = b; j < (Abs(src_width) + b) * 4; ++j) {
  155. src_argb[(i * src_stride_argb) + j] = (fastrand() & 0xff);
  156. }
  157. }
  158. align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
  159. align_buffer_page_end(dst_argb_opt, dst_argb_plane_size);
  160. if (!dst_argb_c || !dst_argb_opt) {
  161. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  162. return 0;
  163. }
  164. memset(dst_argb_c, 2, dst_argb_plane_size);
  165. memset(dst_argb_opt, 3, dst_argb_plane_size);
  166. // Do full image, no clipping.
  167. double c_time = get_time();
  168. ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
  169. src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4,
  170. dst_stride_argb, dst_width, dst_height, f);
  171. c_time = (get_time() - c_time);
  172. // Do tiled image, clipping scale to a tile at a time.
  173. double opt_time = get_time();
  174. for (i = 0; i < benchmark_iterations; ++i) {
  175. TileARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
  176. src_width, src_height,
  177. dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb,
  178. dst_width, dst_height, f);
  179. }
  180. opt_time = (get_time() - opt_time) / benchmark_iterations;
  181. // Report performance of Full vs Tiled.
  182. printf("filter %d - %8d us Full - %8d us Tiled\n", f,
  183. static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
  184. // Compare full scaled image vs tiled image.
  185. int max_diff = 0;
  186. for (i = b; i < (dst_height + b); ++i) {
  187. for (j = b * 4; j < (dst_width + b) * 4; ++j) {
  188. int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] -
  189. dst_argb_opt[(i * dst_stride_argb) + j]);
  190. if (abs_diff > max_diff) {
  191. max_diff = abs_diff;
  192. }
  193. }
  194. }
  195. free_aligned_buffer_page_end(dst_argb_c);
  196. free_aligned_buffer_page_end(dst_argb_opt);
  197. free_aligned_buffer_page_end(src_argb);
  198. return max_diff;
  199. }
  200. // The following adjustments in dimensions ensure the scale factor will be
  201. // exactly achieved.
  202. #define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom)
  203. #define SX(x, nom, denom) static_cast<int>((x / nom) * denom)
  204. #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
  205. TEST_F(LibYUVScaleTest, ARGBScaleDownBy##name##_##filter) { \
  206. int diff = ARGBTestFilter( \
  207. SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
  208. DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
  209. kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
  210. benchmark_cpu_info_); \
  211. EXPECT_LE(diff, max_diff); \
  212. } \
  213. TEST_F(LibYUVScaleTest, ARGBScaleDownClipBy##name##_##filter) { \
  214. int diff = ARGBClipTestFilter( \
  215. SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
  216. DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
  217. kFilter##filter, benchmark_iterations_); \
  218. EXPECT_LE(diff, max_diff); \
  219. }
  220. // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
  221. // filtering is different fixed point implementations for SSSE3, Neon and C.
  222. #define TEST_FACTOR(name, nom, denom) \
  223. TEST_FACTOR1(name, None, nom, denom, 0) \
  224. TEST_FACTOR1(name, Linear, nom, denom, 3) \
  225. TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
  226. TEST_FACTOR1(name, Box, nom, denom, 3)
  227. TEST_FACTOR(2, 1, 2)
  228. TEST_FACTOR(4, 1, 4)
  229. // TEST_FACTOR(8, 1, 8) Disable for benchmark performance.
  230. TEST_FACTOR(3by4, 3, 4)
  231. TEST_FACTOR(3by8, 3, 8)
  232. TEST_FACTOR(3, 1, 3)
  233. #undef TEST_FACTOR1
  234. #undef TEST_FACTOR
  235. #undef SX
  236. #undef DX
  237. #define TEST_SCALETO1(name, width, height, filter, max_diff) \
  238. TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
  239. int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, width, \
  240. height, kFilter##filter, benchmark_iterations_, \
  241. disable_cpu_flags_, benchmark_cpu_info_); \
  242. EXPECT_LE(diff, max_diff); \
  243. } \
  244. TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
  245. int diff = ARGBTestFilter(width, height, Abs(benchmark_width_), \
  246. Abs(benchmark_height_), kFilter##filter, \
  247. benchmark_iterations_, disable_cpu_flags_, \
  248. benchmark_cpu_info_); \
  249. EXPECT_LE(diff, max_diff); \
  250. } \
  251. TEST_F(LibYUVScaleTest, name##ClipTo##width##x##height##_##filter) { \
  252. int diff = \
  253. ARGBClipTestFilter(benchmark_width_, benchmark_height_, width, height, \
  254. kFilter##filter, benchmark_iterations_); \
  255. EXPECT_LE(diff, max_diff); \
  256. } \
  257. TEST_F(LibYUVScaleTest, name##ClipFrom##width##x##height##_##filter) { \
  258. int diff = ARGBClipTestFilter(width, height, Abs(benchmark_width_), \
  259. Abs(benchmark_height_), kFilter##filter, \
  260. benchmark_iterations_); \
  261. EXPECT_LE(diff, max_diff); \
  262. }
  263. /// Test scale to a specified size with all 4 filters.
  264. #define TEST_SCALETO(name, width, height) \
  265. TEST_SCALETO1(name, width, height, None, 0) \
  266. TEST_SCALETO1(name, width, height, Linear, 3) \
  267. TEST_SCALETO1(name, width, height, Bilinear, 3)
  268. TEST_SCALETO(ARGBScale, 1, 1)
  269. TEST_SCALETO(ARGBScale, 320, 240)
  270. TEST_SCALETO(ARGBScale, 569, 480)
  271. TEST_SCALETO(ARGBScale, 640, 360)
  272. #ifdef ENABLE_SLOW_TESTS
  273. TEST_SCALETO(ARGBScale, 1280, 720)
  274. TEST_SCALETO(ARGBScale, 1920, 1080)
  275. #endif // ENABLE_SLOW_TESTS
  276. #undef TEST_SCALETO1
  277. #undef TEST_SCALETO
  278. // Scale with YUV conversion to ARGB and clipping.
  279. // TODO(fbarchard): Add fourcc support. All 4 ARGB formats is easy to support.
  280. LIBYUV_API
  281. int YUVToARGBScaleReference2(const uint8_t* src_y,
  282. int src_stride_y,
  283. const uint8_t* src_u,
  284. int src_stride_u,
  285. const uint8_t* src_v,
  286. int src_stride_v,
  287. uint32_t /* src_fourcc */,
  288. int src_width,
  289. int src_height,
  290. uint8_t* dst_argb,
  291. int dst_stride_argb,
  292. uint32_t /* dst_fourcc */,
  293. int dst_width,
  294. int dst_height,
  295. int clip_x,
  296. int clip_y,
  297. int clip_width,
  298. int clip_height,
  299. enum FilterMode filtering) {
  300. uint8_t* argb_buffer =
  301. static_cast<uint8_t*>(malloc(src_width * src_height * 4));
  302. int r;
  303. I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
  304. argb_buffer, src_width * 4, src_width, src_height);
  305. r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
  306. dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
  307. clip_width, clip_height, filtering);
  308. free(argb_buffer);
  309. return r;
  310. }
  311. static void FillRamp(uint8_t* buf,
  312. int width,
  313. int height,
  314. int v,
  315. int dx,
  316. int dy) {
  317. int rv = v;
  318. for (int y = 0; y < height; ++y) {
  319. for (int x = 0; x < width; ++x) {
  320. *buf++ = v;
  321. v += dx;
  322. if (v < 0 || v > 255) {
  323. dx = -dx;
  324. v += dx;
  325. }
  326. }
  327. v = rv + dy;
  328. if (v < 0 || v > 255) {
  329. dy = -dy;
  330. v += dy;
  331. }
  332. rv = v;
  333. }
  334. }
  335. // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
  336. static int YUVToARGBTestFilter(int src_width,
  337. int src_height,
  338. int dst_width,
  339. int dst_height,
  340. FilterMode f,
  341. int benchmark_iterations) {
  342. int64_t src_y_plane_size = Abs(src_width) * Abs(src_height);
  343. int64_t src_uv_plane_size =
  344. ((Abs(src_width) + 1) / 2) * ((Abs(src_height) + 1) / 2);
  345. int src_stride_y = Abs(src_width);
  346. int src_stride_uv = (Abs(src_width) + 1) / 2;
  347. align_buffer_page_end(src_y, src_y_plane_size);
  348. align_buffer_page_end(src_u, src_uv_plane_size);
  349. align_buffer_page_end(src_v, src_uv_plane_size);
  350. int64_t dst_argb_plane_size = (dst_width) * (dst_height)*4LL;
  351. int dst_stride_argb = (dst_width)*4;
  352. align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
  353. align_buffer_page_end(dst_argb_opt, dst_argb_plane_size);
  354. if (!dst_argb_c || !dst_argb_opt || !src_y || !src_u || !src_v) {
  355. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  356. return 0;
  357. }
  358. // Fill YUV image with continuous ramp, which is less sensitive to
  359. // subsampling and filtering differences for test purposes.
  360. FillRamp(src_y, Abs(src_width), Abs(src_height), 128, 1, 1);
  361. FillRamp(src_u, (Abs(src_width) + 1) / 2, (Abs(src_height) + 1) / 2, 3, 1, 1);
  362. FillRamp(src_v, (Abs(src_width) + 1) / 2, (Abs(src_height) + 1) / 2, 4, 1, 1);
  363. memset(dst_argb_c, 2, dst_argb_plane_size);
  364. memset(dst_argb_opt, 3, dst_argb_plane_size);
  365. YUVToARGBScaleReference2(src_y, src_stride_y, src_u, src_stride_uv, src_v,
  366. src_stride_uv, libyuv::FOURCC_I420, src_width,
  367. src_height, dst_argb_c, dst_stride_argb,
  368. libyuv::FOURCC_I420, dst_width, dst_height, 0, 0,
  369. dst_width, dst_height, f);
  370. for (int i = 0; i < benchmark_iterations; ++i) {
  371. YUVToARGBScaleClip(src_y, src_stride_y, src_u, src_stride_uv, src_v,
  372. src_stride_uv, libyuv::FOURCC_I420, src_width,
  373. src_height, dst_argb_opt, dst_stride_argb,
  374. libyuv::FOURCC_I420, dst_width, dst_height, 0, 0,
  375. dst_width, dst_height, f);
  376. }
  377. int max_diff = 0;
  378. for (int i = 0; i < dst_height; ++i) {
  379. for (int j = 0; j < dst_width * 4; ++j) {
  380. int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] -
  381. dst_argb_opt[(i * dst_stride_argb) + j]);
  382. if (abs_diff > max_diff) {
  383. printf("error %d at %d,%d c %d opt %d", abs_diff, j, i,
  384. dst_argb_c[(i * dst_stride_argb) + j],
  385. dst_argb_opt[(i * dst_stride_argb) + j]);
  386. EXPECT_LE(abs_diff, 40);
  387. max_diff = abs_diff;
  388. }
  389. }
  390. }
  391. free_aligned_buffer_page_end(dst_argb_c);
  392. free_aligned_buffer_page_end(dst_argb_opt);
  393. free_aligned_buffer_page_end(src_y);
  394. free_aligned_buffer_page_end(src_u);
  395. free_aligned_buffer_page_end(src_v);
  396. return max_diff;
  397. }
  398. TEST_F(LibYUVScaleTest, YUVToRGBScaleUp) {
  399. int diff =
  400. YUVToARGBTestFilter(benchmark_width_, benchmark_height_,
  401. benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2,
  402. libyuv::kFilterBilinear, benchmark_iterations_);
  403. EXPECT_LE(diff, 10);
  404. }
  405. TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) {
  406. int diff = YUVToARGBTestFilter(
  407. benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, benchmark_width_,
  408. benchmark_height_, libyuv::kFilterBilinear, benchmark_iterations_);
  409. EXPECT_LE(diff, 10);
  410. }
  411. } // namespace libyuv