scale_test.cc 37 KB


  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <stdlib.h>
  11. #include <time.h>
  12. #include "../unit_test/unit_test.h"
  13. #include "libyuv/cpu_id.h"
  14. #include "libyuv/scale.h"
  15. #ifdef ENABLE_ROW_TESTS
  16. #include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C
  17. #endif
  18. #define STRINGIZE(line) #line
  19. #define FILELINESTR(file, line) file ":" STRINGIZE(line)
  20. namespace libyuv {
  21. // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
  22. static int I420TestFilter(int src_width,
  23. int src_height,
  24. int dst_width,
  25. int dst_height,
  26. FilterMode f,
  27. int benchmark_iterations,
  28. int disable_cpu_flags,
  29. int benchmark_cpu_info) {
  30. if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
  31. return 0;
  32. }
  33. int i, j;
  34. int src_width_uv = (Abs(src_width) + 1) >> 1;
  35. int src_height_uv = (Abs(src_height) + 1) >> 1;
  36. int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
  37. int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
  38. int src_stride_y = Abs(src_width);
  39. int src_stride_uv = src_width_uv;
  40. align_buffer_page_end(src_y, src_y_plane_size);
  41. align_buffer_page_end(src_u, src_uv_plane_size);
  42. align_buffer_page_end(src_v, src_uv_plane_size);
  43. if (!src_y || !src_u || !src_v) {
  44. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  45. return 0;
  46. }
  47. MemRandomize(src_y, src_y_plane_size);
  48. MemRandomize(src_u, src_uv_plane_size);
  49. MemRandomize(src_v, src_uv_plane_size);
  50. int dst_width_uv = (dst_width + 1) >> 1;
  51. int dst_height_uv = (dst_height + 1) >> 1;
  52. int64_t dst_y_plane_size = (dst_width) * (dst_height);
  53. int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
  54. int dst_stride_y = dst_width;
  55. int dst_stride_uv = dst_width_uv;
  56. align_buffer_page_end(dst_y_c, dst_y_plane_size);
  57. align_buffer_page_end(dst_u_c, dst_uv_plane_size);
  58. align_buffer_page_end(dst_v_c, dst_uv_plane_size);
  59. align_buffer_page_end(dst_y_opt, dst_y_plane_size);
  60. align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
  61. align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
  62. if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
  63. !dst_v_opt) {
  64. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  65. return 0;
  66. }
  67. MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
  68. double c_time = get_time();
  69. I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
  70. src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
  71. dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
  72. c_time = (get_time() - c_time);
  73. MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
  74. double opt_time = get_time();
  75. for (i = 0; i < benchmark_iterations; ++i) {
  76. I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
  77. src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
  78. dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
  79. f);
  80. }
  81. opt_time = (get_time() - opt_time) / benchmark_iterations;
  82. // Report performance of C vs OPT.
  83. printf("filter %d - %8d us C - %8d us OPT\n", f,
  84. static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
  85. // C version may be a little off from the optimized. Order of
  86. // operations may introduce rounding somewhere. So do a difference
  87. // of the buffers and look to see that the max difference is not
  88. // over 3.
  89. int max_diff = 0;
  90. for (i = 0; i < (dst_height); ++i) {
  91. for (j = 0; j < (dst_width); ++j) {
  92. int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
  93. dst_y_opt[(i * dst_stride_y) + j]);
  94. if (abs_diff > max_diff) {
  95. max_diff = abs_diff;
  96. }
  97. }
  98. }
  99. for (i = 0; i < (dst_height_uv); ++i) {
  100. for (j = 0; j < (dst_width_uv); ++j) {
  101. int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
  102. dst_u_opt[(i * dst_stride_uv) + j]);
  103. if (abs_diff > max_diff) {
  104. max_diff = abs_diff;
  105. }
  106. abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
  107. dst_v_opt[(i * dst_stride_uv) + j]);
  108. if (abs_diff > max_diff) {
  109. max_diff = abs_diff;
  110. }
  111. }
  112. }
  113. free_aligned_buffer_page_end(dst_y_c);
  114. free_aligned_buffer_page_end(dst_u_c);
  115. free_aligned_buffer_page_end(dst_v_c);
  116. free_aligned_buffer_page_end(dst_y_opt);
  117. free_aligned_buffer_page_end(dst_u_opt);
  118. free_aligned_buffer_page_end(dst_v_opt);
  119. free_aligned_buffer_page_end(src_y);
  120. free_aligned_buffer_page_end(src_u);
  121. free_aligned_buffer_page_end(src_v);
  122. return max_diff;
  123. }
  124. // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
  125. // 0 = exact.
  126. static int I420TestFilter_16(int src_width,
  127. int src_height,
  128. int dst_width,
  129. int dst_height,
  130. FilterMode f,
  131. int benchmark_iterations,
  132. int disable_cpu_flags,
  133. int benchmark_cpu_info) {
  134. if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
  135. return 0;
  136. }
  137. int i;
  138. int src_width_uv = (Abs(src_width) + 1) >> 1;
  139. int src_height_uv = (Abs(src_height) + 1) >> 1;
  140. int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
  141. int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
  142. int src_stride_y = Abs(src_width);
  143. int src_stride_uv = src_width_uv;
  144. align_buffer_page_end(src_y, src_y_plane_size);
  145. align_buffer_page_end(src_u, src_uv_plane_size);
  146. align_buffer_page_end(src_v, src_uv_plane_size);
  147. align_buffer_page_end(src_y_16, src_y_plane_size * 2);
  148. align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
  149. align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
  150. if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
  151. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  152. return 0;
  153. }
  154. uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
  155. uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
  156. uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
  157. MemRandomize(src_y, src_y_plane_size);
  158. MemRandomize(src_u, src_uv_plane_size);
  159. MemRandomize(src_v, src_uv_plane_size);
  160. for (i = 0; i < src_y_plane_size; ++i) {
  161. p_src_y_16[i] = src_y[i];
  162. }
  163. for (i = 0; i < src_uv_plane_size; ++i) {
  164. p_src_u_16[i] = src_u[i];
  165. p_src_v_16[i] = src_v[i];
  166. }
  167. int dst_width_uv = (dst_width + 1) >> 1;
  168. int dst_height_uv = (dst_height + 1) >> 1;
  169. int dst_y_plane_size = (dst_width) * (dst_height);
  170. int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
  171. int dst_stride_y = dst_width;
  172. int dst_stride_uv = dst_width_uv;
  173. align_buffer_page_end(dst_y_8, dst_y_plane_size);
  174. align_buffer_page_end(dst_u_8, dst_uv_plane_size);
  175. align_buffer_page_end(dst_v_8, dst_uv_plane_size);
  176. align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
  177. align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
  178. align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
  179. uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
  180. uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
  181. uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
  182. MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
  183. I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
  184. src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
  185. dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
  186. MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
  187. for (i = 0; i < benchmark_iterations; ++i) {
  188. I420Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
  189. p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
  190. dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
  191. dst_stride_uv, dst_width, dst_height, f);
  192. }
  193. // Expect an exact match.
  194. int max_diff = 0;
  195. for (i = 0; i < dst_y_plane_size; ++i) {
  196. int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
  197. if (abs_diff > max_diff) {
  198. max_diff = abs_diff;
  199. }
  200. }
  201. for (i = 0; i < dst_uv_plane_size; ++i) {
  202. int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
  203. if (abs_diff > max_diff) {
  204. max_diff = abs_diff;
  205. }
  206. abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
  207. if (abs_diff > max_diff) {
  208. max_diff = abs_diff;
  209. }
  210. }
  211. free_aligned_buffer_page_end(dst_y_8);
  212. free_aligned_buffer_page_end(dst_u_8);
  213. free_aligned_buffer_page_end(dst_v_8);
  214. free_aligned_buffer_page_end(dst_y_16);
  215. free_aligned_buffer_page_end(dst_u_16);
  216. free_aligned_buffer_page_end(dst_v_16);
  217. free_aligned_buffer_page_end(src_y);
  218. free_aligned_buffer_page_end(src_u);
  219. free_aligned_buffer_page_end(src_v);
  220. free_aligned_buffer_page_end(src_y_16);
  221. free_aligned_buffer_page_end(src_u_16);
  222. free_aligned_buffer_page_end(src_v_16);
  223. return max_diff;
  224. }
  225. // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
  226. static int I444TestFilter(int src_width,
  227. int src_height,
  228. int dst_width,
  229. int dst_height,
  230. FilterMode f,
  231. int benchmark_iterations,
  232. int disable_cpu_flags,
  233. int benchmark_cpu_info) {
  234. if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
  235. return 0;
  236. }
  237. int i, j;
  238. int src_width_uv = Abs(src_width);
  239. int src_height_uv = Abs(src_height);
  240. int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
  241. int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
  242. int src_stride_y = Abs(src_width);
  243. int src_stride_uv = src_width_uv;
  244. align_buffer_page_end(src_y, src_y_plane_size);
  245. align_buffer_page_end(src_u, src_uv_plane_size);
  246. align_buffer_page_end(src_v, src_uv_plane_size);
  247. if (!src_y || !src_u || !src_v) {
  248. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  249. return 0;
  250. }
  251. MemRandomize(src_y, src_y_plane_size);
  252. MemRandomize(src_u, src_uv_plane_size);
  253. MemRandomize(src_v, src_uv_plane_size);
  254. int dst_width_uv = dst_width;
  255. int dst_height_uv = dst_height;
  256. int64_t dst_y_plane_size = (dst_width) * (dst_height);
  257. int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
  258. int dst_stride_y = dst_width;
  259. int dst_stride_uv = dst_width_uv;
  260. align_buffer_page_end(dst_y_c, dst_y_plane_size);
  261. align_buffer_page_end(dst_u_c, dst_uv_plane_size);
  262. align_buffer_page_end(dst_v_c, dst_uv_plane_size);
  263. align_buffer_page_end(dst_y_opt, dst_y_plane_size);
  264. align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
  265. align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
  266. if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
  267. !dst_v_opt) {
  268. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  269. return 0;
  270. }
  271. MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
  272. double c_time = get_time();
  273. I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
  274. src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
  275. dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
  276. c_time = (get_time() - c_time);
  277. MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
  278. double opt_time = get_time();
  279. for (i = 0; i < benchmark_iterations; ++i) {
  280. I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
  281. src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
  282. dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
  283. f);
  284. }
  285. opt_time = (get_time() - opt_time) / benchmark_iterations;
  286. // Report performance of C vs OPT.
  287. printf("filter %d - %8d us C - %8d us OPT\n", f,
  288. static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
  289. // C version may be a little off from the optimized. Order of
  290. // operations may introduce rounding somewhere. So do a difference
  291. // of the buffers and look to see that the max difference is not
  292. // over 3.
  293. int max_diff = 0;
  294. for (i = 0; i < (dst_height); ++i) {
  295. for (j = 0; j < (dst_width); ++j) {
  296. int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
  297. dst_y_opt[(i * dst_stride_y) + j]);
  298. if (abs_diff > max_diff) {
  299. max_diff = abs_diff;
  300. }
  301. }
  302. }
  303. for (i = 0; i < (dst_height_uv); ++i) {
  304. for (j = 0; j < (dst_width_uv); ++j) {
  305. int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
  306. dst_u_opt[(i * dst_stride_uv) + j]);
  307. if (abs_diff > max_diff) {
  308. max_diff = abs_diff;
  309. }
  310. abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
  311. dst_v_opt[(i * dst_stride_uv) + j]);
  312. if (abs_diff > max_diff) {
  313. max_diff = abs_diff;
  314. }
  315. }
  316. }
  317. free_aligned_buffer_page_end(dst_y_c);
  318. free_aligned_buffer_page_end(dst_u_c);
  319. free_aligned_buffer_page_end(dst_v_c);
  320. free_aligned_buffer_page_end(dst_y_opt);
  321. free_aligned_buffer_page_end(dst_u_opt);
  322. free_aligned_buffer_page_end(dst_v_opt);
  323. free_aligned_buffer_page_end(src_y);
  324. free_aligned_buffer_page_end(src_u);
  325. free_aligned_buffer_page_end(src_v);
  326. return max_diff;
  327. }
  328. // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
  329. // 0 = exact.
  330. static int I444TestFilter_16(int src_width,
  331. int src_height,
  332. int dst_width,
  333. int dst_height,
  334. FilterMode f,
  335. int benchmark_iterations,
  336. int disable_cpu_flags,
  337. int benchmark_cpu_info) {
  338. if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
  339. return 0;
  340. }
  341. int i;
  342. int src_width_uv = Abs(src_width);
  343. int src_height_uv = Abs(src_height);
  344. int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
  345. int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
  346. int src_stride_y = Abs(src_width);
  347. int src_stride_uv = src_width_uv;
  348. align_buffer_page_end(src_y, src_y_plane_size);
  349. align_buffer_page_end(src_u, src_uv_plane_size);
  350. align_buffer_page_end(src_v, src_uv_plane_size);
  351. align_buffer_page_end(src_y_16, src_y_plane_size * 2);
  352. align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
  353. align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
  354. if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
  355. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  356. return 0;
  357. }
  358. uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
  359. uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
  360. uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
  361. MemRandomize(src_y, src_y_plane_size);
  362. MemRandomize(src_u, src_uv_plane_size);
  363. MemRandomize(src_v, src_uv_plane_size);
  364. for (i = 0; i < src_y_plane_size; ++i) {
  365. p_src_y_16[i] = src_y[i];
  366. }
  367. for (i = 0; i < src_uv_plane_size; ++i) {
  368. p_src_u_16[i] = src_u[i];
  369. p_src_v_16[i] = src_v[i];
  370. }
  371. int dst_width_uv = dst_width;
  372. int dst_height_uv = dst_height;
  373. int dst_y_plane_size = (dst_width) * (dst_height);
  374. int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
  375. int dst_stride_y = dst_width;
  376. int dst_stride_uv = dst_width_uv;
  377. align_buffer_page_end(dst_y_8, dst_y_plane_size);
  378. align_buffer_page_end(dst_u_8, dst_uv_plane_size);
  379. align_buffer_page_end(dst_v_8, dst_uv_plane_size);
  380. align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
  381. align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
  382. align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
  383. uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
  384. uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
  385. uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
  386. MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
  387. I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
  388. src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
  389. dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
  390. MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
  391. for (i = 0; i < benchmark_iterations; ++i) {
  392. I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
  393. p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
  394. dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
  395. dst_stride_uv, dst_width, dst_height, f);
  396. }
  397. // Expect an exact match.
  398. int max_diff = 0;
  399. for (i = 0; i < dst_y_plane_size; ++i) {
  400. int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
  401. if (abs_diff > max_diff) {
  402. max_diff = abs_diff;
  403. }
  404. }
  405. for (i = 0; i < dst_uv_plane_size; ++i) {
  406. int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
  407. if (abs_diff > max_diff) {
  408. max_diff = abs_diff;
  409. }
  410. abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
  411. if (abs_diff > max_diff) {
  412. max_diff = abs_diff;
  413. }
  414. }
  415. free_aligned_buffer_page_end(dst_y_8);
  416. free_aligned_buffer_page_end(dst_u_8);
  417. free_aligned_buffer_page_end(dst_v_8);
  418. free_aligned_buffer_page_end(dst_y_16);
  419. free_aligned_buffer_page_end(dst_u_16);
  420. free_aligned_buffer_page_end(dst_v_16);
  421. free_aligned_buffer_page_end(src_y);
  422. free_aligned_buffer_page_end(src_u);
  423. free_aligned_buffer_page_end(src_v);
  424. free_aligned_buffer_page_end(src_y_16);
  425. free_aligned_buffer_page_end(src_u_16);
  426. free_aligned_buffer_page_end(src_v_16);
  427. return max_diff;
  428. }
  429. // The following adjustments in dimensions ensure the scale factor will be
  430. // exactly achieved.
  431. // 2 is chroma subsample.
  432. #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
  433. #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
  434. #define TEST_FACTOR1(DISABLED_, name, filter, nom, denom, max_diff) \
  435. TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
  436. int diff = I420TestFilter( \
  437. SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
  438. DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
  439. kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
  440. benchmark_cpu_info_); \
  441. EXPECT_LE(diff, max_diff); \
  442. } \
  443. TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
  444. int diff = I444TestFilter( \
  445. SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
  446. DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
  447. kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
  448. benchmark_cpu_info_); \
  449. EXPECT_LE(diff, max_diff); \
  450. } \
  451. TEST_F(LibYUVScaleTest, DISABLED_##I420ScaleDownBy##name##_##filter##_16) { \
  452. int diff = I420TestFilter_16( \
  453. SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
  454. DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
  455. kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
  456. benchmark_cpu_info_); \
  457. EXPECT_LE(diff, max_diff); \
  458. } \
  459. TEST_F(LibYUVScaleTest, DISABLED_##I444ScaleDownBy##name##_##filter##_16) { \
  460. int diff = I444TestFilter_16( \
  461. SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
  462. DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
  463. kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
  464. benchmark_cpu_info_); \
  465. EXPECT_LE(diff, max_diff); \
  466. }
  467. // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
  468. // filtering is different fixed point implementations for SSSE3, Neon and C.
  469. #ifdef ENABLE_SLOW_TESTS
  470. #define TEST_FACTOR(name, nom, denom, boxdiff) \
  471. TEST_FACTOR1(, name, None, nom, denom, 0) \
  472. TEST_FACTOR1(, name, Linear, nom, denom, 3) \
  473. TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
  474. TEST_FACTOR1(, name, Box, nom, denom, boxdiff)
  475. #else
  476. #define TEST_FACTOR(name, nom, denom, boxdiff) \
  477. TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \
  478. TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \
  479. TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
  480. TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
  481. #endif
  482. TEST_FACTOR(2, 1, 2, 0)
  483. TEST_FACTOR(4, 1, 4, 0)
  484. // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
  485. TEST_FACTOR(3by4, 3, 4, 1)
  486. TEST_FACTOR(3by8, 3, 8, 1)
  487. TEST_FACTOR(3, 1, 3, 0)
  488. #undef TEST_FACTOR1
  489. #undef TEST_FACTOR
  490. #undef SX
  491. #undef DX
  492. #define TEST_SCALETO1(DISABLED_, name, width, height, filter, max_diff) \
  493. TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
  494. int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
  495. height, kFilter##filter, benchmark_iterations_, \
  496. disable_cpu_flags_, benchmark_cpu_info_); \
  497. EXPECT_LE(diff, max_diff); \
  498. } \
  499. TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \
  500. int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \
  501. height, kFilter##filter, benchmark_iterations_, \
  502. disable_cpu_flags_, benchmark_cpu_info_); \
  503. EXPECT_LE(diff, max_diff); \
  504. } \
  505. TEST_F(LibYUVScaleTest, \
  506. DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \
  507. int diff = I420TestFilter_16( \
  508. benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
  509. benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
  510. EXPECT_LE(diff, max_diff); \
  511. } \
  512. TEST_F(LibYUVScaleTest, \
  513. DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \
  514. int diff = I444TestFilter_16( \
  515. benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
  516. benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
  517. EXPECT_LE(diff, max_diff); \
  518. } \
  519. TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
  520. int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
  521. Abs(benchmark_height_), kFilter##filter, \
  522. benchmark_iterations_, disable_cpu_flags_, \
  523. benchmark_cpu_info_); \
  524. EXPECT_LE(diff, max_diff); \
  525. } \
  526. TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \
  527. int diff = I444TestFilter(width, height, Abs(benchmark_width_), \
  528. Abs(benchmark_height_), kFilter##filter, \
  529. benchmark_iterations_, disable_cpu_flags_, \
  530. benchmark_cpu_info_); \
  531. EXPECT_LE(diff, max_diff); \
  532. } \
  533. TEST_F(LibYUVScaleTest, \
  534. DISABLED_##I420##name##From##width##x##height##_##filter##_16) { \
  535. int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
  536. Abs(benchmark_height_), kFilter##filter, \
  537. benchmark_iterations_, disable_cpu_flags_, \
  538. benchmark_cpu_info_); \
  539. EXPECT_LE(diff, max_diff); \
  540. } \
  541. TEST_F(LibYUVScaleTest, \
  542. DISABLED_##I444##name##From##width##x##height##_##filter##_16) { \
  543. int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
  544. Abs(benchmark_height_), kFilter##filter, \
  545. benchmark_iterations_, disable_cpu_flags_, \
  546. benchmark_cpu_info_); \
  547. EXPECT_LE(diff, max_diff); \
  548. }
  549. #ifdef ENABLE_SLOW_TESTS
  550. // Test scale to a specified size with all 4 filters.
  551. #define TEST_SCALETO(name, width, height) \
  552. TEST_SCALETO1(, name, width, height, None, 0) \
  553. TEST_SCALETO1(, name, width, height, Linear, 3) \
  554. TEST_SCALETO1(, name, width, height, Bilinear, 3) \
  555. TEST_SCALETO1(, name, width, height, Box, 3)
  556. #else
  557. // Test scale to a specified size with all 4 filters.
  558. #define TEST_SCALETO(name, width, height) \
  559. TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \
  560. TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \
  561. TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
  562. TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
  563. #endif
  564. TEST_SCALETO(Scale, 1, 1)
  565. TEST_SCALETO(Scale, 320, 240)
  566. TEST_SCALETO(Scale, 569, 480)
  567. TEST_SCALETO(Scale, 640, 360)
  568. TEST_SCALETO(Scale, 1280, 720)
  569. #ifdef ENABLE_SLOW_TESTS
  570. TEST_SCALETO(Scale, 1920, 1080)
  571. #endif // ENABLE_SLOW_TESTS
  572. #undef TEST_SCALETO1
  573. #undef TEST_SCALETO
  574. #ifdef ENABLE_ROW_TESTS
  575. #ifdef HAS_SCALEROWDOWN2_SSSE3
  576. TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
  577. SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
  578. SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
  579. SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
  580. memset(orig_pixels, 0, sizeof(orig_pixels));
  581. memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
  582. memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
  583. int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
  584. if (!has_ssse3) {
  585. printf("Warning SSSE3 not detected; Skipping test.\n");
  586. } else {
  587. // TL.
  588. orig_pixels[0] = 255u;
  589. orig_pixels[1] = 0u;
  590. orig_pixels[128 + 0] = 0u;
  591. orig_pixels[128 + 1] = 0u;
  592. // TR.
  593. orig_pixels[2] = 0u;
  594. orig_pixels[3] = 100u;
  595. orig_pixels[128 + 2] = 0u;
  596. orig_pixels[128 + 3] = 0u;
  597. // BL.
  598. orig_pixels[4] = 0u;
  599. orig_pixels[5] = 0u;
  600. orig_pixels[128 + 4] = 50u;
  601. orig_pixels[128 + 5] = 0u;
  602. // BR.
  603. orig_pixels[6] = 0u;
  604. orig_pixels[7] = 0u;
  605. orig_pixels[128 + 6] = 0u;
  606. orig_pixels[128 + 7] = 20u;
  607. // Odd.
  608. orig_pixels[126] = 4u;
  609. orig_pixels[127] = 255u;
  610. orig_pixels[128 + 126] = 16u;
  611. orig_pixels[128 + 127] = 255u;
  612. // Test regular half size.
  613. ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
  614. EXPECT_EQ(64u, dst_pixels_c[0]);
  615. EXPECT_EQ(25u, dst_pixels_c[1]);
  616. EXPECT_EQ(13u, dst_pixels_c[2]);
  617. EXPECT_EQ(5u, dst_pixels_c[3]);
  618. EXPECT_EQ(0u, dst_pixels_c[4]);
  619. EXPECT_EQ(133u, dst_pixels_c[63]);
  620. // Test Odd width version - Last pixel is just 1 horizontal pixel.
  621. ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
  622. EXPECT_EQ(64u, dst_pixels_c[0]);
  623. EXPECT_EQ(25u, dst_pixels_c[1]);
  624. EXPECT_EQ(13u, dst_pixels_c[2]);
  625. EXPECT_EQ(5u, dst_pixels_c[3]);
  626. EXPECT_EQ(0u, dst_pixels_c[4]);
  627. EXPECT_EQ(10u, dst_pixels_c[63]);
  628. // Test one pixel less, should skip the last pixel.
  629. memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
  630. ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
  631. EXPECT_EQ(64u, dst_pixels_c[0]);
  632. EXPECT_EQ(25u, dst_pixels_c[1]);
  633. EXPECT_EQ(13u, dst_pixels_c[2]);
  634. EXPECT_EQ(5u, dst_pixels_c[3]);
  635. EXPECT_EQ(0u, dst_pixels_c[4]);
  636. EXPECT_EQ(0u, dst_pixels_c[63]);
  637. // Test regular half size SSSE3.
  638. ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
  639. EXPECT_EQ(64u, dst_pixels_opt[0]);
  640. EXPECT_EQ(25u, dst_pixels_opt[1]);
  641. EXPECT_EQ(13u, dst_pixels_opt[2]);
  642. EXPECT_EQ(5u, dst_pixels_opt[3]);
  643. EXPECT_EQ(0u, dst_pixels_opt[4]);
  644. EXPECT_EQ(133u, dst_pixels_opt[63]);
  645. // Compare C and SSSE3 match.
  646. ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
  647. ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
  648. for (int i = 0; i < 64; ++i) {
  649. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  650. }
  651. }
  652. }
  653. #endif // HAS_SCALEROWDOWN2_SSSE3
  654. extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
  655. ptrdiff_t src_stride,
  656. uint16_t* dst,
  657. int dst_width);
  658. extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
  659. ptrdiff_t src_stride,
  660. uint16_t* dst,
  661. int dst_width);
  662. extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
  663. ptrdiff_t src_stride,
  664. uint16_t* dst,
  665. int dst_width);
  666. TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
  667. SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun.
  668. SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
  669. SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
  670. memset(orig_pixels, 0, sizeof(orig_pixels));
  671. memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt));
  672. memset(dst_pixels_c, 2, sizeof(dst_pixels_c));
  673. for (int i = 0; i < 640 * 2 + 1; ++i) {
  674. orig_pixels[i] = i;
  675. }
  676. ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280);
  677. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  678. #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
  679. int has_neon = TestCpuFlag(kCpuHasNEON);
  680. if (has_neon) {
  681. ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
  682. } else {
  683. ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
  684. }
  685. #elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
  686. int has_mmi = TestCpuFlag(kCpuHasMMI);
  687. if (has_mmi) {
  688. ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
  689. } else {
  690. ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
  691. }
  692. #else
  693. ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
  694. #endif
  695. }
  696. for (int i = 0; i < 1280; ++i) {
  697. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  698. }
  699. EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
  700. EXPECT_EQ(dst_pixels_c[1279], 800);
  701. }
  702. extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
  703. ptrdiff_t src_stride,
  704. uint16_t* dst,
  705. int dst_width);
  706. TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
  707. SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
  708. SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
  709. SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
  710. memset(orig_pixels, 0, sizeof(orig_pixels));
  711. memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
  712. memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
  713. for (int i = 0; i < 2560 * 2; ++i) {
  714. orig_pixels[i] = i;
  715. }
  716. ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
  717. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  718. #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
  719. int has_neon = TestCpuFlag(kCpuHasNEON);
  720. if (has_neon) {
  721. ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
  722. } else {
  723. ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
  724. }
  725. #else
  726. ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
  727. #endif
  728. }
  729. for (int i = 0; i < 1280; ++i) {
  730. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  731. }
  732. EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
  733. EXPECT_EQ(dst_pixels_c[1279], 3839);
  734. }
  735. #endif // ENABLE_ROW_TESTS
  736. // Test scaling plane with 8 bit C vs 16 bit C and return maximum pixel
  737. // difference.
  738. // 0 = exact.
  739. static int TestPlaneFilter_16(int src_width,
  740. int src_height,
  741. int dst_width,
  742. int dst_height,
  743. FilterMode f,
  744. int benchmark_iterations,
  745. int disable_cpu_flags,
  746. int benchmark_cpu_info) {
  747. if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
  748. return 0;
  749. }
  750. int i;
  751. int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
  752. int src_stride_y = Abs(src_width);
  753. int dst_y_plane_size = dst_width * dst_height;
  754. int dst_stride_y = dst_width;
  755. align_buffer_page_end(src_y, src_y_plane_size);
  756. align_buffer_page_end(src_y_16, src_y_plane_size * 2);
  757. align_buffer_page_end(dst_y_8, dst_y_plane_size);
  758. align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
  759. uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
  760. uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
  761. MemRandomize(src_y, src_y_plane_size);
  762. memset(dst_y_8, 0, dst_y_plane_size);
  763. memset(dst_y_16, 1, dst_y_plane_size * 2);
  764. for (i = 0; i < src_y_plane_size; ++i) {
  765. p_src_y_16[i] = src_y[i] & 255;
  766. }
  767. MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
  768. ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
  769. dst_width, dst_height, f);
  770. MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
  771. for (i = 0; i < benchmark_iterations; ++i) {
  772. ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
  773. dst_stride_y, dst_width, dst_height, f);
  774. }
  775. // Expect an exact match.
  776. int max_diff = 0;
  777. for (i = 0; i < dst_y_plane_size; ++i) {
  778. int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
  779. if (abs_diff > max_diff) {
  780. max_diff = abs_diff;
  781. }
  782. }
  783. free_aligned_buffer_page_end(dst_y_8);
  784. free_aligned_buffer_page_end(dst_y_16);
  785. free_aligned_buffer_page_end(src_y);
  786. free_aligned_buffer_page_end(src_y_16);
  787. return max_diff;
  788. }
  789. // The following adjustments in dimensions ensure the scale factor will be
  790. // exactly achieved.
  791. // 2 is chroma subsample.
  792. #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
  793. #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
  794. #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
  795. TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
  796. int diff = TestPlaneFilter_16( \
  797. SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
  798. DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
  799. kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
  800. benchmark_cpu_info_); \
  801. EXPECT_LE(diff, max_diff); \
  802. }
  803. // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
  804. // filtering is different fixed point implementations for SSSE3, Neon and C.
  805. #define TEST_FACTOR(name, nom, denom, boxdiff) \
  806. TEST_FACTOR1(name, None, nom, denom, 0) \
  807. TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \
  808. TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
  809. TEST_FACTOR1(name, Box, nom, denom, boxdiff)
  810. TEST_FACTOR(2, 1, 2, 0)
  811. TEST_FACTOR(4, 1, 4, 0)
  812. // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
  813. TEST_FACTOR(3by4, 3, 4, 1)
  814. TEST_FACTOR(3by8, 3, 8, 1)
  815. TEST_FACTOR(3, 1, 3, 0)
  816. #undef TEST_FACTOR1
  817. #undef TEST_FACTOR
  818. #undef SX
  819. #undef DX
  820. } // namespace libyuv