planar_test.cc 126 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565
  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <math.h>
  11. #include <stdlib.h>
  12. #include <time.h>
  13. #include "../unit_test/unit_test.h"
  14. #include "libyuv/compare.h"
  15. #include "libyuv/convert.h"
  16. #include "libyuv/convert_argb.h"
  17. #include "libyuv/convert_from.h"
  18. #include "libyuv/convert_from_argb.h"
  19. #include "libyuv/cpu_id.h"
  20. #include "libyuv/planar_functions.h"
  21. #include "libyuv/rotate.h"
  22. #include "libyuv/scale.h"
  23. #ifdef ENABLE_ROW_TESTS
  24. // row.h defines SIMD_ALIGNED, overriding unit_test.h
  25. // TODO(fbarchard): Remove row.h from unittests. Test public functions.
  26. #include "libyuv/row.h" /* For ScaleSumSamples_Neon */
  27. #endif
  28. namespace libyuv {
  29. TEST_F(LibYUVPlanarTest, TestAttenuate) {
  30. const int kSize = 1280 * 4;
  31. align_buffer_page_end(orig_pixels, kSize);
  32. align_buffer_page_end(atten_pixels, kSize);
  33. align_buffer_page_end(unatten_pixels, kSize);
  34. align_buffer_page_end(atten2_pixels, kSize);
  35. // Test unattenuation clamps
  36. orig_pixels[0 * 4 + 0] = 200u;
  37. orig_pixels[0 * 4 + 1] = 129u;
  38. orig_pixels[0 * 4 + 2] = 127u;
  39. orig_pixels[0 * 4 + 3] = 128u;
  40. // Test unattenuation transparent and opaque are unaffected
  41. orig_pixels[1 * 4 + 0] = 16u;
  42. orig_pixels[1 * 4 + 1] = 64u;
  43. orig_pixels[1 * 4 + 2] = 192u;
  44. orig_pixels[1 * 4 + 3] = 0u;
  45. orig_pixels[2 * 4 + 0] = 16u;
  46. orig_pixels[2 * 4 + 1] = 64u;
  47. orig_pixels[2 * 4 + 2] = 192u;
  48. orig_pixels[2 * 4 + 3] = 255u;
  49. orig_pixels[3 * 4 + 0] = 16u;
  50. orig_pixels[3 * 4 + 1] = 64u;
  51. orig_pixels[3 * 4 + 2] = 192u;
  52. orig_pixels[3 * 4 + 3] = 128u;
  53. ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1);
  54. EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
  55. EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
  56. EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
  57. EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]);
  58. EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]);
  59. EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
  60. EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
  61. EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
  62. EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]);
  63. EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]);
  64. EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]);
  65. EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]);
  66. EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]);
  67. EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]);
  68. EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]);
  69. EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]);
  70. for (int i = 0; i < 1280; ++i) {
  71. orig_pixels[i * 4 + 0] = i;
  72. orig_pixels[i * 4 + 1] = i / 2;
  73. orig_pixels[i * 4 + 2] = i / 3;
  74. orig_pixels[i * 4 + 3] = i;
  75. }
  76. ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 1280, 1);
  77. ARGBUnattenuate(atten_pixels, 0, unatten_pixels, 0, 1280, 1);
  78. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  79. ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
  80. }
  81. for (int i = 0; i < 1280; ++i) {
  82. EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2);
  83. EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2);
  84. EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2);
  85. EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2);
  86. }
  87. // Make sure transparent, 50% and opaque are fully accurate.
  88. EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
  89. EXPECT_EQ(0, atten_pixels[0 * 4 + 1]);
  90. EXPECT_EQ(0, atten_pixels[0 * 4 + 2]);
  91. EXPECT_EQ(0, atten_pixels[0 * 4 + 3]);
  92. EXPECT_EQ(64, atten_pixels[128 * 4 + 0]);
  93. EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
  94. EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
  95. EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
  96. EXPECT_NEAR(255, atten_pixels[255 * 4 + 0], 1);
  97. EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], 1);
  98. EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1);
  99. EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
  100. free_aligned_buffer_page_end(atten2_pixels);
  101. free_aligned_buffer_page_end(unatten_pixels);
  102. free_aligned_buffer_page_end(atten_pixels);
  103. free_aligned_buffer_page_end(orig_pixels);
  104. }
  105. static int TestAttenuateI(int width,
  106. int height,
  107. int benchmark_iterations,
  108. int disable_cpu_flags,
  109. int benchmark_cpu_info,
  110. int invert,
  111. int off) {
  112. if (width < 1) {
  113. width = 1;
  114. }
  115. const int kBpp = 4;
  116. const int kStride = width * kBpp;
  117. align_buffer_page_end(src_argb, kStride * height + off);
  118. align_buffer_page_end(dst_argb_c, kStride * height);
  119. align_buffer_page_end(dst_argb_opt, kStride * height);
  120. for (int i = 0; i < kStride * height; ++i) {
  121. src_argb[i + off] = (fastrand() & 0xff);
  122. }
  123. memset(dst_argb_c, 0, kStride * height);
  124. memset(dst_argb_opt, 0, kStride * height);
  125. MaskCpuFlags(disable_cpu_flags);
  126. ARGBAttenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
  127. invert * height);
  128. MaskCpuFlags(benchmark_cpu_info);
  129. for (int i = 0; i < benchmark_iterations; ++i) {
  130. ARGBAttenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
  131. invert * height);
  132. }
  133. int max_diff = 0;
  134. for (int i = 0; i < kStride * height; ++i) {
  135. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  136. static_cast<int>(dst_argb_opt[i]));
  137. if (abs_diff > max_diff) {
  138. max_diff = abs_diff;
  139. }
  140. }
  141. free_aligned_buffer_page_end(src_argb);
  142. free_aligned_buffer_page_end(dst_argb_c);
  143. free_aligned_buffer_page_end(dst_argb_opt);
  144. return max_diff;
  145. }
  146. TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
  147. int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_,
  148. benchmark_iterations_, disable_cpu_flags_,
  149. benchmark_cpu_info_, +1, 0);
  150. EXPECT_LE(max_diff, 2);
  151. }
  152. TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
  153. int max_diff =
  154. TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
  155. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  156. EXPECT_LE(max_diff, 2);
  157. }
  158. TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
  159. int max_diff =
  160. TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
  161. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  162. EXPECT_LE(max_diff, 2);
  163. }
  164. TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
  165. int max_diff =
  166. TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
  167. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  168. EXPECT_LE(max_diff, 2);
  169. }
  170. static int TestUnattenuateI(int width,
  171. int height,
  172. int benchmark_iterations,
  173. int disable_cpu_flags,
  174. int benchmark_cpu_info,
  175. int invert,
  176. int off) {
  177. if (width < 1) {
  178. width = 1;
  179. }
  180. const int kBpp = 4;
  181. const int kStride = width * kBpp;
  182. align_buffer_page_end(src_argb, kStride * height + off);
  183. align_buffer_page_end(dst_argb_c, kStride * height);
  184. align_buffer_page_end(dst_argb_opt, kStride * height);
  185. for (int i = 0; i < kStride * height; ++i) {
  186. src_argb[i + off] = (fastrand() & 0xff);
  187. }
  188. ARGBAttenuate(src_argb + off, kStride, src_argb + off, kStride, width,
  189. height);
  190. memset(dst_argb_c, 0, kStride * height);
  191. memset(dst_argb_opt, 0, kStride * height);
  192. MaskCpuFlags(disable_cpu_flags);
  193. ARGBUnattenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
  194. invert * height);
  195. MaskCpuFlags(benchmark_cpu_info);
  196. for (int i = 0; i < benchmark_iterations; ++i) {
  197. ARGBUnattenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
  198. invert * height);
  199. }
  200. int max_diff = 0;
  201. for (int i = 0; i < kStride * height; ++i) {
  202. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  203. static_cast<int>(dst_argb_opt[i]));
  204. if (abs_diff > max_diff) {
  205. max_diff = abs_diff;
  206. }
  207. }
  208. free_aligned_buffer_page_end(src_argb);
  209. free_aligned_buffer_page_end(dst_argb_c);
  210. free_aligned_buffer_page_end(dst_argb_opt);
  211. return max_diff;
  212. }
  213. TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
  214. int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_,
  215. benchmark_iterations_, disable_cpu_flags_,
  216. benchmark_cpu_info_, +1, 0);
  217. EXPECT_LE(max_diff, 2);
  218. }
  219. TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
  220. int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
  221. benchmark_iterations_, disable_cpu_flags_,
  222. benchmark_cpu_info_, +1, 1);
  223. EXPECT_LE(max_diff, 2);
  224. }
  225. TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
  226. int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
  227. benchmark_iterations_, disable_cpu_flags_,
  228. benchmark_cpu_info_, -1, 0);
  229. EXPECT_LE(max_diff, 2);
  230. }
  231. TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
  232. int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
  233. benchmark_iterations_, disable_cpu_flags_,
  234. benchmark_cpu_info_, +1, 0);
  235. EXPECT_LE(max_diff, 2);
  236. }
  237. TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
  238. SIMD_ALIGNED(uint8_t orig_pixels[16][16][4]);
  239. SIMD_ALIGNED(int32_t added_pixels[16][16][4]);
  240. for (int y = 0; y < 16; ++y) {
  241. for (int x = 0; x < 16; ++x) {
  242. orig_pixels[y][x][0] = 1u;
  243. orig_pixels[y][x][1] = 2u;
  244. orig_pixels[y][x][2] = 3u;
  245. orig_pixels[y][x][3] = 255u;
  246. }
  247. }
  248. ARGBComputeCumulativeSum(&orig_pixels[0][0][0], 16 * 4,
  249. &added_pixels[0][0][0], 16 * 4, 16, 16);
  250. for (int y = 0; y < 16; ++y) {
  251. for (int x = 0; x < 16; ++x) {
  252. EXPECT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]);
  253. EXPECT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]);
  254. EXPECT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]);
  255. EXPECT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]);
  256. }
  257. }
  258. }
  259. // near is for legacy platforms.
  260. TEST_F(LibYUVPlanarTest, TestARGBGray) {
  261. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  262. memset(orig_pixels, 0, sizeof(orig_pixels));
  263. // Test blue
  264. orig_pixels[0][0] = 255u;
  265. orig_pixels[0][1] = 0u;
  266. orig_pixels[0][2] = 0u;
  267. orig_pixels[0][3] = 128u;
  268. // Test green
  269. orig_pixels[1][0] = 0u;
  270. orig_pixels[1][1] = 255u;
  271. orig_pixels[1][2] = 0u;
  272. orig_pixels[1][3] = 0u;
  273. // Test red
  274. orig_pixels[2][0] = 0u;
  275. orig_pixels[2][1] = 0u;
  276. orig_pixels[2][2] = 255u;
  277. orig_pixels[2][3] = 255u;
  278. // Test black
  279. orig_pixels[3][0] = 0u;
  280. orig_pixels[3][1] = 0u;
  281. orig_pixels[3][2] = 0u;
  282. orig_pixels[3][3] = 255u;
  283. // Test white
  284. orig_pixels[4][0] = 255u;
  285. orig_pixels[4][1] = 255u;
  286. orig_pixels[4][2] = 255u;
  287. orig_pixels[4][3] = 255u;
  288. // Test color
  289. orig_pixels[5][0] = 16u;
  290. orig_pixels[5][1] = 64u;
  291. orig_pixels[5][2] = 192u;
  292. orig_pixels[5][3] = 224u;
  293. // Do 16 to test asm version.
  294. ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
  295. EXPECT_NEAR(29u, orig_pixels[0][0], 1);
  296. EXPECT_NEAR(29u, orig_pixels[0][1], 1);
  297. EXPECT_NEAR(29u, orig_pixels[0][2], 1);
  298. EXPECT_EQ(128u, orig_pixels[0][3]);
  299. EXPECT_EQ(149u, orig_pixels[1][0]);
  300. EXPECT_EQ(149u, orig_pixels[1][1]);
  301. EXPECT_EQ(149u, orig_pixels[1][2]);
  302. EXPECT_EQ(0u, orig_pixels[1][3]);
  303. EXPECT_NEAR(77u, orig_pixels[2][0], 1);
  304. EXPECT_NEAR(77u, orig_pixels[2][1], 1);
  305. EXPECT_NEAR(77u, orig_pixels[2][2], 1);
  306. EXPECT_EQ(255u, orig_pixels[2][3]);
  307. EXPECT_EQ(0u, orig_pixels[3][0]);
  308. EXPECT_EQ(0u, orig_pixels[3][1]);
  309. EXPECT_EQ(0u, orig_pixels[3][2]);
  310. EXPECT_EQ(255u, orig_pixels[3][3]);
  311. EXPECT_EQ(255u, orig_pixels[4][0]);
  312. EXPECT_EQ(255u, orig_pixels[4][1]);
  313. EXPECT_EQ(255u, orig_pixels[4][2]);
  314. EXPECT_EQ(255u, orig_pixels[4][3]);
  315. EXPECT_NEAR(97u, orig_pixels[5][0], 1);
  316. EXPECT_NEAR(97u, orig_pixels[5][1], 1);
  317. EXPECT_NEAR(97u, orig_pixels[5][2], 1);
  318. EXPECT_EQ(224u, orig_pixels[5][3]);
  319. for (int i = 0; i < 1280; ++i) {
  320. orig_pixels[i][0] = i;
  321. orig_pixels[i][1] = i / 2;
  322. orig_pixels[i][2] = i / 3;
  323. orig_pixels[i][3] = i;
  324. }
  325. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  326. ARGBGray(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
  327. }
  328. }
  329. TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
  330. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  331. SIMD_ALIGNED(uint8_t gray_pixels[1280][4]);
  332. memset(orig_pixels, 0, sizeof(orig_pixels));
  333. // Test blue
  334. orig_pixels[0][0] = 255u;
  335. orig_pixels[0][1] = 0u;
  336. orig_pixels[0][2] = 0u;
  337. orig_pixels[0][3] = 128u;
  338. // Test green
  339. orig_pixels[1][0] = 0u;
  340. orig_pixels[1][1] = 255u;
  341. orig_pixels[1][2] = 0u;
  342. orig_pixels[1][3] = 0u;
  343. // Test red
  344. orig_pixels[2][0] = 0u;
  345. orig_pixels[2][1] = 0u;
  346. orig_pixels[2][2] = 255u;
  347. orig_pixels[2][3] = 255u;
  348. // Test black
  349. orig_pixels[3][0] = 0u;
  350. orig_pixels[3][1] = 0u;
  351. orig_pixels[3][2] = 0u;
  352. orig_pixels[3][3] = 255u;
  353. // Test white
  354. orig_pixels[4][0] = 255u;
  355. orig_pixels[4][1] = 255u;
  356. orig_pixels[4][2] = 255u;
  357. orig_pixels[4][3] = 255u;
  358. // Test color
  359. orig_pixels[5][0] = 16u;
  360. orig_pixels[5][1] = 64u;
  361. orig_pixels[5][2] = 192u;
  362. orig_pixels[5][3] = 224u;
  363. // Do 16 to test asm version.
  364. ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
  365. EXPECT_NEAR(30u, gray_pixels[0][0], 1);
  366. EXPECT_NEAR(30u, gray_pixels[0][1], 1);
  367. EXPECT_NEAR(30u, gray_pixels[0][2], 1);
  368. EXPECT_NEAR(128u, gray_pixels[0][3], 1);
  369. EXPECT_NEAR(149u, gray_pixels[1][0], 1);
  370. EXPECT_NEAR(149u, gray_pixels[1][1], 1);
  371. EXPECT_NEAR(149u, gray_pixels[1][2], 1);
  372. EXPECT_NEAR(0u, gray_pixels[1][3], 1);
  373. EXPECT_NEAR(76u, gray_pixels[2][0], 1);
  374. EXPECT_NEAR(76u, gray_pixels[2][1], 1);
  375. EXPECT_NEAR(76u, gray_pixels[2][2], 1);
  376. EXPECT_NEAR(255u, gray_pixels[2][3], 1);
  377. EXPECT_NEAR(0u, gray_pixels[3][0], 1);
  378. EXPECT_NEAR(0u, gray_pixels[3][1], 1);
  379. EXPECT_NEAR(0u, gray_pixels[3][2], 1);
  380. EXPECT_NEAR(255u, gray_pixels[3][3], 1);
  381. EXPECT_NEAR(255u, gray_pixels[4][0], 1);
  382. EXPECT_NEAR(255u, gray_pixels[4][1], 1);
  383. EXPECT_NEAR(255u, gray_pixels[4][2], 1);
  384. EXPECT_NEAR(255u, gray_pixels[4][3], 1);
  385. EXPECT_NEAR(96u, gray_pixels[5][0], 1);
  386. EXPECT_NEAR(96u, gray_pixels[5][1], 1);
  387. EXPECT_NEAR(96u, gray_pixels[5][2], 1);
  388. EXPECT_NEAR(224u, gray_pixels[5][3], 1);
  389. for (int i = 0; i < 1280; ++i) {
  390. orig_pixels[i][0] = i;
  391. orig_pixels[i][1] = i / 2;
  392. orig_pixels[i][2] = i / 3;
  393. orig_pixels[i][3] = i;
  394. }
  395. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  396. ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1);
  397. }
  398. for (int i = 0; i < 256; ++i) {
  399. orig_pixels[i][0] = i;
  400. orig_pixels[i][1] = i;
  401. orig_pixels[i][2] = i;
  402. orig_pixels[i][3] = i;
  403. }
  404. ARGBGray(&orig_pixels[0][0], 0, 0, 0, 256, 1);
  405. for (int i = 0; i < 256; ++i) {
  406. EXPECT_EQ(i, orig_pixels[i][0]);
  407. EXPECT_EQ(i, orig_pixels[i][1]);
  408. EXPECT_EQ(i, orig_pixels[i][2]);
  409. EXPECT_EQ(i, orig_pixels[i][3]);
  410. }
  411. }
  412. TEST_F(LibYUVPlanarTest, TestARGBSepia) {
  413. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  414. memset(orig_pixels, 0, sizeof(orig_pixels));
  415. // Test blue
  416. orig_pixels[0][0] = 255u;
  417. orig_pixels[0][1] = 0u;
  418. orig_pixels[0][2] = 0u;
  419. orig_pixels[0][3] = 128u;
  420. // Test green
  421. orig_pixels[1][0] = 0u;
  422. orig_pixels[1][1] = 255u;
  423. orig_pixels[1][2] = 0u;
  424. orig_pixels[1][3] = 0u;
  425. // Test red
  426. orig_pixels[2][0] = 0u;
  427. orig_pixels[2][1] = 0u;
  428. orig_pixels[2][2] = 255u;
  429. orig_pixels[2][3] = 255u;
  430. // Test black
  431. orig_pixels[3][0] = 0u;
  432. orig_pixels[3][1] = 0u;
  433. orig_pixels[3][2] = 0u;
  434. orig_pixels[3][3] = 255u;
  435. // Test white
  436. orig_pixels[4][0] = 255u;
  437. orig_pixels[4][1] = 255u;
  438. orig_pixels[4][2] = 255u;
  439. orig_pixels[4][3] = 255u;
  440. // Test color
  441. orig_pixels[5][0] = 16u;
  442. orig_pixels[5][1] = 64u;
  443. orig_pixels[5][2] = 192u;
  444. orig_pixels[5][3] = 224u;
  445. // Do 16 to test asm version.
  446. ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1);
  447. EXPECT_EQ(33u, orig_pixels[0][0]);
  448. EXPECT_EQ(43u, orig_pixels[0][1]);
  449. EXPECT_EQ(47u, orig_pixels[0][2]);
  450. EXPECT_EQ(128u, orig_pixels[0][3]);
  451. EXPECT_EQ(135u, orig_pixels[1][0]);
  452. EXPECT_EQ(175u, orig_pixels[1][1]);
  453. EXPECT_EQ(195u, orig_pixels[1][2]);
  454. EXPECT_EQ(0u, orig_pixels[1][3]);
  455. EXPECT_EQ(69u, orig_pixels[2][0]);
  456. EXPECT_EQ(89u, orig_pixels[2][1]);
  457. EXPECT_EQ(99u, orig_pixels[2][2]);
  458. EXPECT_EQ(255u, orig_pixels[2][3]);
  459. EXPECT_EQ(0u, orig_pixels[3][0]);
  460. EXPECT_EQ(0u, orig_pixels[3][1]);
  461. EXPECT_EQ(0u, orig_pixels[3][2]);
  462. EXPECT_EQ(255u, orig_pixels[3][3]);
  463. EXPECT_EQ(239u, orig_pixels[4][0]);
  464. EXPECT_EQ(255u, orig_pixels[4][1]);
  465. EXPECT_EQ(255u, orig_pixels[4][2]);
  466. EXPECT_EQ(255u, orig_pixels[4][3]);
  467. EXPECT_EQ(88u, orig_pixels[5][0]);
  468. EXPECT_EQ(114u, orig_pixels[5][1]);
  469. EXPECT_EQ(127u, orig_pixels[5][2]);
  470. EXPECT_EQ(224u, orig_pixels[5][3]);
  471. for (int i = 0; i < 1280; ++i) {
  472. orig_pixels[i][0] = i;
  473. orig_pixels[i][1] = i / 2;
  474. orig_pixels[i][2] = i / 3;
  475. orig_pixels[i][3] = i;
  476. }
  477. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  478. ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
  479. }
  480. }
  481. TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) {
  482. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  483. SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
  484. SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
  485. // Matrix for Sepia.
  486. SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
  487. 17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0,
  488. 24 / 2, 98 / 2, 50 / 2, 0, 0, 0, 0, 64, // Copy alpha.
  489. };
  490. memset(orig_pixels, 0, sizeof(orig_pixels));
  491. // Test blue
  492. orig_pixels[0][0] = 255u;
  493. orig_pixels[0][1] = 0u;
  494. orig_pixels[0][2] = 0u;
  495. orig_pixels[0][3] = 128u;
  496. // Test green
  497. orig_pixels[1][0] = 0u;
  498. orig_pixels[1][1] = 255u;
  499. orig_pixels[1][2] = 0u;
  500. orig_pixels[1][3] = 0u;
  501. // Test red
  502. orig_pixels[2][0] = 0u;
  503. orig_pixels[2][1] = 0u;
  504. orig_pixels[2][2] = 255u;
  505. orig_pixels[2][3] = 255u;
  506. // Test color
  507. orig_pixels[3][0] = 16u;
  508. orig_pixels[3][1] = 64u;
  509. orig_pixels[3][2] = 192u;
  510. orig_pixels[3][3] = 224u;
  511. // Do 16 to test asm version.
  512. ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
  513. &kRGBToSepia[0], 16, 1);
  514. EXPECT_EQ(31u, dst_pixels_opt[0][0]);
  515. EXPECT_EQ(43u, dst_pixels_opt[0][1]);
  516. EXPECT_EQ(47u, dst_pixels_opt[0][2]);
  517. EXPECT_EQ(128u, dst_pixels_opt[0][3]);
  518. EXPECT_EQ(135u, dst_pixels_opt[1][0]);
  519. EXPECT_EQ(175u, dst_pixels_opt[1][1]);
  520. EXPECT_EQ(195u, dst_pixels_opt[1][2]);
  521. EXPECT_EQ(0u, dst_pixels_opt[1][3]);
  522. EXPECT_EQ(67u, dst_pixels_opt[2][0]);
  523. EXPECT_EQ(87u, dst_pixels_opt[2][1]);
  524. EXPECT_EQ(99u, dst_pixels_opt[2][2]);
  525. EXPECT_EQ(255u, dst_pixels_opt[2][3]);
  526. EXPECT_EQ(87u, dst_pixels_opt[3][0]);
  527. EXPECT_EQ(112u, dst_pixels_opt[3][1]);
  528. EXPECT_EQ(127u, dst_pixels_opt[3][2]);
  529. EXPECT_EQ(224u, dst_pixels_opt[3][3]);
  530. for (int i = 0; i < 1280; ++i) {
  531. orig_pixels[i][0] = i;
  532. orig_pixels[i][1] = i / 2;
  533. orig_pixels[i][2] = i / 3;
  534. orig_pixels[i][3] = i;
  535. }
  536. MaskCpuFlags(disable_cpu_flags_);
  537. ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
  538. &kRGBToSepia[0], 1280, 1);
  539. MaskCpuFlags(benchmark_cpu_info_);
  540. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  541. ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
  542. &kRGBToSepia[0], 1280, 1);
  543. }
  544. for (int i = 0; i < 1280; ++i) {
  545. EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
  546. EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
  547. EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
  548. EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
  549. }
  550. }
  551. TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) {
  552. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  553. // Matrix for Sepia.
  554. SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
  555. 17, 68, 35, 0, 22, 88, 45, 0,
  556. 24, 98, 50, 0, 0, 0, 0, 0, // Unused but makes matrix 16 bytes.
  557. };
  558. memset(orig_pixels, 0, sizeof(orig_pixels));
  559. // Test blue
  560. orig_pixels[0][0] = 255u;
  561. orig_pixels[0][1] = 0u;
  562. orig_pixels[0][2] = 0u;
  563. orig_pixels[0][3] = 128u;
  564. // Test green
  565. orig_pixels[1][0] = 0u;
  566. orig_pixels[1][1] = 255u;
  567. orig_pixels[1][2] = 0u;
  568. orig_pixels[1][3] = 0u;
  569. // Test red
  570. orig_pixels[2][0] = 0u;
  571. orig_pixels[2][1] = 0u;
  572. orig_pixels[2][2] = 255u;
  573. orig_pixels[2][3] = 255u;
  574. // Test color
  575. orig_pixels[3][0] = 16u;
  576. orig_pixels[3][1] = 64u;
  577. orig_pixels[3][2] = 192u;
  578. orig_pixels[3][3] = 224u;
  579. // Do 16 to test asm version.
  580. RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1);
  581. EXPECT_EQ(31u, orig_pixels[0][0]);
  582. EXPECT_EQ(43u, orig_pixels[0][1]);
  583. EXPECT_EQ(47u, orig_pixels[0][2]);
  584. EXPECT_EQ(128u, orig_pixels[0][3]);
  585. EXPECT_EQ(135u, orig_pixels[1][0]);
  586. EXPECT_EQ(175u, orig_pixels[1][1]);
  587. EXPECT_EQ(195u, orig_pixels[1][2]);
  588. EXPECT_EQ(0u, orig_pixels[1][3]);
  589. EXPECT_EQ(67u, orig_pixels[2][0]);
  590. EXPECT_EQ(87u, orig_pixels[2][1]);
  591. EXPECT_EQ(99u, orig_pixels[2][2]);
  592. EXPECT_EQ(255u, orig_pixels[2][3]);
  593. EXPECT_EQ(87u, orig_pixels[3][0]);
  594. EXPECT_EQ(112u, orig_pixels[3][1]);
  595. EXPECT_EQ(127u, orig_pixels[3][2]);
  596. EXPECT_EQ(224u, orig_pixels[3][3]);
  597. for (int i = 0; i < 1280; ++i) {
  598. orig_pixels[i][0] = i;
  599. orig_pixels[i][1] = i / 2;
  600. orig_pixels[i][2] = i / 3;
  601. orig_pixels[i][3] = i;
  602. }
  603. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  604. RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1);
  605. }
  606. }
  607. TEST_F(LibYUVPlanarTest, TestARGBColorTable) {
  608. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  609. memset(orig_pixels, 0, sizeof(orig_pixels));
  610. // Matrix for Sepia.
  611. static const uint8_t kARGBTable[256 * 4] = {
  612. 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
  613. };
  614. orig_pixels[0][0] = 0u;
  615. orig_pixels[0][1] = 0u;
  616. orig_pixels[0][2] = 0u;
  617. orig_pixels[0][3] = 0u;
  618. orig_pixels[1][0] = 1u;
  619. orig_pixels[1][1] = 1u;
  620. orig_pixels[1][2] = 1u;
  621. orig_pixels[1][3] = 1u;
  622. orig_pixels[2][0] = 2u;
  623. orig_pixels[2][1] = 2u;
  624. orig_pixels[2][2] = 2u;
  625. orig_pixels[2][3] = 2u;
  626. orig_pixels[3][0] = 0u;
  627. orig_pixels[3][1] = 1u;
  628. orig_pixels[3][2] = 2u;
  629. orig_pixels[3][3] = 3u;
  630. // Do 16 to test asm version.
  631. ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
  632. EXPECT_EQ(1u, orig_pixels[0][0]);
  633. EXPECT_EQ(2u, orig_pixels[0][1]);
  634. EXPECT_EQ(3u, orig_pixels[0][2]);
  635. EXPECT_EQ(4u, orig_pixels[0][3]);
  636. EXPECT_EQ(5u, orig_pixels[1][0]);
  637. EXPECT_EQ(6u, orig_pixels[1][1]);
  638. EXPECT_EQ(7u, orig_pixels[1][2]);
  639. EXPECT_EQ(8u, orig_pixels[1][3]);
  640. EXPECT_EQ(9u, orig_pixels[2][0]);
  641. EXPECT_EQ(10u, orig_pixels[2][1]);
  642. EXPECT_EQ(11u, orig_pixels[2][2]);
  643. EXPECT_EQ(12u, orig_pixels[2][3]);
  644. EXPECT_EQ(1u, orig_pixels[3][0]);
  645. EXPECT_EQ(6u, orig_pixels[3][1]);
  646. EXPECT_EQ(11u, orig_pixels[3][2]);
  647. EXPECT_EQ(16u, orig_pixels[3][3]);
  648. for (int i = 0; i < 1280; ++i) {
  649. orig_pixels[i][0] = i;
  650. orig_pixels[i][1] = i / 2;
  651. orig_pixels[i][2] = i / 3;
  652. orig_pixels[i][3] = i;
  653. }
  654. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  655. ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
  656. }
  657. }
  658. // Same as TestARGBColorTable except alpha does not change.
  659. TEST_F(LibYUVPlanarTest, TestRGBColorTable) {
  660. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  661. memset(orig_pixels, 0, sizeof(orig_pixels));
  662. // Matrix for Sepia.
  663. static const uint8_t kARGBTable[256 * 4] = {
  664. 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
  665. };
  666. orig_pixels[0][0] = 0u;
  667. orig_pixels[0][1] = 0u;
  668. orig_pixels[0][2] = 0u;
  669. orig_pixels[0][3] = 0u;
  670. orig_pixels[1][0] = 1u;
  671. orig_pixels[1][1] = 1u;
  672. orig_pixels[1][2] = 1u;
  673. orig_pixels[1][3] = 1u;
  674. orig_pixels[2][0] = 2u;
  675. orig_pixels[2][1] = 2u;
  676. orig_pixels[2][2] = 2u;
  677. orig_pixels[2][3] = 2u;
  678. orig_pixels[3][0] = 0u;
  679. orig_pixels[3][1] = 1u;
  680. orig_pixels[3][2] = 2u;
  681. orig_pixels[3][3] = 3u;
  682. // Do 16 to test asm version.
  683. RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
  684. EXPECT_EQ(1u, orig_pixels[0][0]);
  685. EXPECT_EQ(2u, orig_pixels[0][1]);
  686. EXPECT_EQ(3u, orig_pixels[0][2]);
  687. EXPECT_EQ(0u, orig_pixels[0][3]); // Alpha unchanged.
  688. EXPECT_EQ(5u, orig_pixels[1][0]);
  689. EXPECT_EQ(6u, orig_pixels[1][1]);
  690. EXPECT_EQ(7u, orig_pixels[1][2]);
  691. EXPECT_EQ(1u, orig_pixels[1][3]); // Alpha unchanged.
  692. EXPECT_EQ(9u, orig_pixels[2][0]);
  693. EXPECT_EQ(10u, orig_pixels[2][1]);
  694. EXPECT_EQ(11u, orig_pixels[2][2]);
  695. EXPECT_EQ(2u, orig_pixels[2][3]); // Alpha unchanged.
  696. EXPECT_EQ(1u, orig_pixels[3][0]);
  697. EXPECT_EQ(6u, orig_pixels[3][1]);
  698. EXPECT_EQ(11u, orig_pixels[3][2]);
  699. EXPECT_EQ(3u, orig_pixels[3][3]); // Alpha unchanged.
  700. for (int i = 0; i < 1280; ++i) {
  701. orig_pixels[i][0] = i;
  702. orig_pixels[i][1] = i / 2;
  703. orig_pixels[i][2] = i / 3;
  704. orig_pixels[i][3] = i;
  705. }
  706. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  707. RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
  708. }
  709. }
  710. TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
  711. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  712. for (int i = 0; i < 1280; ++i) {
  713. orig_pixels[i][0] = i;
  714. orig_pixels[i][1] = i / 2;
  715. orig_pixels[i][2] = i / 3;
  716. orig_pixels[i][3] = i;
  717. }
  718. ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
  719. 1280, 1);
  720. for (int i = 0; i < 1280; ++i) {
  721. EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]);
  722. EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]);
  723. EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]);
  724. EXPECT_EQ(i & 255, orig_pixels[i][3]);
  725. }
  726. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  727. ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
  728. 1280, 1);
  729. }
  730. }
  731. TEST_F(LibYUVPlanarTest, ARGBMirror_Opt) {
  732. align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 4);
  733. align_buffer_page_end(dst_pixels_opt,
  734. benchmark_width_ * benchmark_height_ * 4);
  735. align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 4);
  736. MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 4);
  737. MaskCpuFlags(disable_cpu_flags_);
  738. ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_c,
  739. benchmark_width_ * 4, benchmark_width_, benchmark_height_);
  740. MaskCpuFlags(benchmark_cpu_info_);
  741. for (int i = 0; i < benchmark_iterations_; ++i) {
  742. ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
  743. benchmark_width_ * 4, benchmark_width_, benchmark_height_);
  744. }
  745. for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
  746. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  747. }
  748. free_aligned_buffer_page_end(src_pixels);
  749. free_aligned_buffer_page_end(dst_pixels_opt);
  750. free_aligned_buffer_page_end(dst_pixels_c);
  751. }
  752. TEST_F(LibYUVPlanarTest, MirrorPlane_Opt) {
  753. align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_);
  754. align_buffer_page_end(dst_pixels_opt, benchmark_width_ * benchmark_height_);
  755. align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_);
  756. MemRandomize(src_pixels, benchmark_width_ * benchmark_height_);
  757. MaskCpuFlags(disable_cpu_flags_);
  758. MirrorPlane(src_pixels, benchmark_width_, dst_pixels_c, benchmark_width_,
  759. benchmark_width_, benchmark_height_);
  760. MaskCpuFlags(benchmark_cpu_info_);
  761. for (int i = 0; i < benchmark_iterations_; ++i) {
  762. MirrorPlane(src_pixels, benchmark_width_, dst_pixels_opt, benchmark_width_,
  763. benchmark_width_, benchmark_height_);
  764. }
  765. for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
  766. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  767. }
  768. free_aligned_buffer_page_end(src_pixels);
  769. free_aligned_buffer_page_end(dst_pixels_opt);
  770. free_aligned_buffer_page_end(dst_pixels_c);
  771. }
  772. TEST_F(LibYUVPlanarTest, MirrorUVPlane_Opt) {
  773. align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 2);
  774. align_buffer_page_end(dst_pixels_opt,
  775. benchmark_width_ * benchmark_height_ * 2);
  776. align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 2);
  777. MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 2);
  778. MaskCpuFlags(disable_cpu_flags_);
  779. MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
  780. benchmark_width_ * 2, benchmark_width_, benchmark_height_);
  781. MaskCpuFlags(benchmark_cpu_info_);
  782. for (int i = 0; i < benchmark_iterations_; ++i) {
  783. MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
  784. benchmark_width_ * 2, benchmark_width_, benchmark_height_);
  785. }
  786. for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) {
  787. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  788. }
  789. free_aligned_buffer_page_end(src_pixels);
  790. free_aligned_buffer_page_end(dst_pixels_opt);
  791. free_aligned_buffer_page_end(dst_pixels_c);
  792. }
  793. TEST_F(LibYUVPlanarTest, TestShade) {
  794. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  795. SIMD_ALIGNED(uint8_t shade_pixels[1280][4]);
  796. memset(orig_pixels, 0, sizeof(orig_pixels));
  797. orig_pixels[0][0] = 10u;
  798. orig_pixels[0][1] = 20u;
  799. orig_pixels[0][2] = 40u;
  800. orig_pixels[0][3] = 80u;
  801. orig_pixels[1][0] = 0u;
  802. orig_pixels[1][1] = 0u;
  803. orig_pixels[1][2] = 0u;
  804. orig_pixels[1][3] = 255u;
  805. orig_pixels[2][0] = 0u;
  806. orig_pixels[2][1] = 0u;
  807. orig_pixels[2][2] = 0u;
  808. orig_pixels[2][3] = 0u;
  809. orig_pixels[3][0] = 0u;
  810. orig_pixels[3][1] = 0u;
  811. orig_pixels[3][2] = 0u;
  812. orig_pixels[3][3] = 0u;
  813. // Do 8 pixels to allow opt version to be used.
  814. ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff);
  815. EXPECT_EQ(10u, shade_pixels[0][0]);
  816. EXPECT_EQ(20u, shade_pixels[0][1]);
  817. EXPECT_EQ(40u, shade_pixels[0][2]);
  818. EXPECT_EQ(40u, shade_pixels[0][3]);
  819. EXPECT_EQ(0u, shade_pixels[1][0]);
  820. EXPECT_EQ(0u, shade_pixels[1][1]);
  821. EXPECT_EQ(0u, shade_pixels[1][2]);
  822. EXPECT_EQ(128u, shade_pixels[1][3]);
  823. EXPECT_EQ(0u, shade_pixels[2][0]);
  824. EXPECT_EQ(0u, shade_pixels[2][1]);
  825. EXPECT_EQ(0u, shade_pixels[2][2]);
  826. EXPECT_EQ(0u, shade_pixels[2][3]);
  827. EXPECT_EQ(0u, shade_pixels[3][0]);
  828. EXPECT_EQ(0u, shade_pixels[3][1]);
  829. EXPECT_EQ(0u, shade_pixels[3][2]);
  830. EXPECT_EQ(0u, shade_pixels[3][3]);
  831. ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080);
  832. EXPECT_EQ(5u, shade_pixels[0][0]);
  833. EXPECT_EQ(10u, shade_pixels[0][1]);
  834. EXPECT_EQ(20u, shade_pixels[0][2]);
  835. EXPECT_EQ(40u, shade_pixels[0][3]);
  836. ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080);
  837. EXPECT_EQ(5u, shade_pixels[0][0]);
  838. EXPECT_EQ(5u, shade_pixels[0][1]);
  839. EXPECT_EQ(5u, shade_pixels[0][2]);
  840. EXPECT_EQ(5u, shade_pixels[0][3]);
  841. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  842. ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1,
  843. 0x80808080);
  844. }
  845. }
  846. TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
  847. SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
  848. SIMD_ALIGNED(uint8_t orig_pixels_1[1280][4]);
  849. SIMD_ALIGNED(uint8_t interpolate_pixels[1280][4]);
  850. memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
  851. memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
  852. orig_pixels_0[0][0] = 16u;
  853. orig_pixels_0[0][1] = 32u;
  854. orig_pixels_0[0][2] = 64u;
  855. orig_pixels_0[0][3] = 128u;
  856. orig_pixels_0[1][0] = 0u;
  857. orig_pixels_0[1][1] = 0u;
  858. orig_pixels_0[1][2] = 0u;
  859. orig_pixels_0[1][3] = 255u;
  860. orig_pixels_0[2][0] = 0u;
  861. orig_pixels_0[2][1] = 0u;
  862. orig_pixels_0[2][2] = 0u;
  863. orig_pixels_0[2][3] = 0u;
  864. orig_pixels_0[3][0] = 0u;
  865. orig_pixels_0[3][1] = 0u;
  866. orig_pixels_0[3][2] = 0u;
  867. orig_pixels_0[3][3] = 0u;
  868. orig_pixels_1[0][0] = 0u;
  869. orig_pixels_1[0][1] = 0u;
  870. orig_pixels_1[0][2] = 0u;
  871. orig_pixels_1[0][3] = 0u;
  872. orig_pixels_1[1][0] = 0u;
  873. orig_pixels_1[1][1] = 0u;
  874. orig_pixels_1[1][2] = 0u;
  875. orig_pixels_1[1][3] = 0u;
  876. orig_pixels_1[2][0] = 0u;
  877. orig_pixels_1[2][1] = 0u;
  878. orig_pixels_1[2][2] = 0u;
  879. orig_pixels_1[2][3] = 0u;
  880. orig_pixels_1[3][0] = 255u;
  881. orig_pixels_1[3][1] = 255u;
  882. orig_pixels_1[3][2] = 255u;
  883. orig_pixels_1[3][3] = 255u;
  884. ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
  885. &interpolate_pixels[0][0], 0, 4, 1, 128);
  886. EXPECT_EQ(8u, interpolate_pixels[0][0]);
  887. EXPECT_EQ(16u, interpolate_pixels[0][1]);
  888. EXPECT_EQ(32u, interpolate_pixels[0][2]);
  889. EXPECT_EQ(64u, interpolate_pixels[0][3]);
  890. EXPECT_EQ(0u, interpolate_pixels[1][0]);
  891. EXPECT_EQ(0u, interpolate_pixels[1][1]);
  892. EXPECT_EQ(0u, interpolate_pixels[1][2]);
  893. EXPECT_EQ(128u, interpolate_pixels[1][3]);
  894. EXPECT_EQ(0u, interpolate_pixels[2][0]);
  895. EXPECT_EQ(0u, interpolate_pixels[2][1]);
  896. EXPECT_EQ(0u, interpolate_pixels[2][2]);
  897. EXPECT_EQ(0u, interpolate_pixels[2][3]);
  898. EXPECT_EQ(128u, interpolate_pixels[3][0]);
  899. EXPECT_EQ(128u, interpolate_pixels[3][1]);
  900. EXPECT_EQ(128u, interpolate_pixels[3][2]);
  901. EXPECT_EQ(128u, interpolate_pixels[3][3]);
  902. ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
  903. &interpolate_pixels[0][0], 0, 4, 1, 0);
  904. EXPECT_EQ(16u, interpolate_pixels[0][0]);
  905. EXPECT_EQ(32u, interpolate_pixels[0][1]);
  906. EXPECT_EQ(64u, interpolate_pixels[0][2]);
  907. EXPECT_EQ(128u, interpolate_pixels[0][3]);
  908. ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
  909. &interpolate_pixels[0][0], 0, 4, 1, 192);
  910. EXPECT_EQ(4u, interpolate_pixels[0][0]);
  911. EXPECT_EQ(8u, interpolate_pixels[0][1]);
  912. EXPECT_EQ(16u, interpolate_pixels[0][2]);
  913. EXPECT_EQ(32u, interpolate_pixels[0][3]);
  914. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  915. ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
  916. &interpolate_pixels[0][0], 0, 1280, 1, 128);
  917. }
  918. }
  919. TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
  920. SIMD_ALIGNED(uint8_t orig_pixels_0[1280]);
  921. SIMD_ALIGNED(uint8_t orig_pixels_1[1280]);
  922. SIMD_ALIGNED(uint8_t interpolate_pixels[1280]);
  923. memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
  924. memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
  925. orig_pixels_0[0] = 16u;
  926. orig_pixels_0[1] = 32u;
  927. orig_pixels_0[2] = 64u;
  928. orig_pixels_0[3] = 128u;
  929. orig_pixels_0[4] = 0u;
  930. orig_pixels_0[5] = 0u;
  931. orig_pixels_0[6] = 0u;
  932. orig_pixels_0[7] = 255u;
  933. orig_pixels_0[8] = 0u;
  934. orig_pixels_0[9] = 0u;
  935. orig_pixels_0[10] = 0u;
  936. orig_pixels_0[11] = 0u;
  937. orig_pixels_0[12] = 0u;
  938. orig_pixels_0[13] = 0u;
  939. orig_pixels_0[14] = 0u;
  940. orig_pixels_0[15] = 0u;
  941. orig_pixels_1[0] = 0u;
  942. orig_pixels_1[1] = 0u;
  943. orig_pixels_1[2] = 0u;
  944. orig_pixels_1[3] = 0u;
  945. orig_pixels_1[4] = 0u;
  946. orig_pixels_1[5] = 0u;
  947. orig_pixels_1[6] = 0u;
  948. orig_pixels_1[7] = 0u;
  949. orig_pixels_1[8] = 0u;
  950. orig_pixels_1[9] = 0u;
  951. orig_pixels_1[10] = 0u;
  952. orig_pixels_1[11] = 0u;
  953. orig_pixels_1[12] = 255u;
  954. orig_pixels_1[13] = 255u;
  955. orig_pixels_1[14] = 255u;
  956. orig_pixels_1[15] = 255u;
  957. InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
  958. &interpolate_pixels[0], 0, 16, 1, 128);
  959. EXPECT_EQ(8u, interpolate_pixels[0]);
  960. EXPECT_EQ(16u, interpolate_pixels[1]);
  961. EXPECT_EQ(32u, interpolate_pixels[2]);
  962. EXPECT_EQ(64u, interpolate_pixels[3]);
  963. EXPECT_EQ(0u, interpolate_pixels[4]);
  964. EXPECT_EQ(0u, interpolate_pixels[5]);
  965. EXPECT_EQ(0u, interpolate_pixels[6]);
  966. EXPECT_EQ(128u, interpolate_pixels[7]);
  967. EXPECT_EQ(0u, interpolate_pixels[8]);
  968. EXPECT_EQ(0u, interpolate_pixels[9]);
  969. EXPECT_EQ(0u, interpolate_pixels[10]);
  970. EXPECT_EQ(0u, interpolate_pixels[11]);
  971. EXPECT_EQ(128u, interpolate_pixels[12]);
  972. EXPECT_EQ(128u, interpolate_pixels[13]);
  973. EXPECT_EQ(128u, interpolate_pixels[14]);
  974. EXPECT_EQ(128u, interpolate_pixels[15]);
  975. InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
  976. &interpolate_pixels[0], 0, 16, 1, 0);
  977. EXPECT_EQ(16u, interpolate_pixels[0]);
  978. EXPECT_EQ(32u, interpolate_pixels[1]);
  979. EXPECT_EQ(64u, interpolate_pixels[2]);
  980. EXPECT_EQ(128u, interpolate_pixels[3]);
  981. InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
  982. &interpolate_pixels[0], 0, 16, 1, 192);
  983. EXPECT_EQ(4u, interpolate_pixels[0]);
  984. EXPECT_EQ(8u, interpolate_pixels[1]);
  985. EXPECT_EQ(16u, interpolate_pixels[2]);
  986. EXPECT_EQ(32u, interpolate_pixels[3]);
  987. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  988. InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
  989. &interpolate_pixels[0], 0, 1280, 1, 123);
  990. }
  991. }
  992. #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
  993. N, NEG, OFF) \
  994. TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \
  995. const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
  996. const int kHeight = benchmark_height_; \
  997. const int kStrideA = \
  998. (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
  999. const int kStrideB = \
  1000. (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
  1001. align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \
  1002. align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF); \
  1003. align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \
  1004. align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \
  1005. for (int i = 0; i < kStrideA * kHeight; ++i) { \
  1006. src_argb_a[i + OFF] = (fastrand() & 0xff); \
  1007. src_argb_b[i + OFF] = (fastrand() & 0xff); \
  1008. } \
  1009. MaskCpuFlags(disable_cpu_flags_); \
  1010. ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
  1011. dst_argb_c, kStrideB, kWidth, NEG kHeight, TERP); \
  1012. MaskCpuFlags(benchmark_cpu_info_); \
  1013. for (int i = 0; i < benchmark_iterations_; ++i) { \
  1014. ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
  1015. dst_argb_opt, kStrideB, kWidth, NEG kHeight, TERP); \
  1016. } \
  1017. for (int i = 0; i < kStrideB * kHeight; ++i) { \
  1018. EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
  1019. } \
  1020. free_aligned_buffer_page_end(src_argb_a); \
  1021. free_aligned_buffer_page_end(src_argb_b); \
  1022. free_aligned_buffer_page_end(dst_argb_c); \
  1023. free_aligned_buffer_page_end(dst_argb_opt); \
  1024. }
  1025. #define TESTINTERPOLATE(TERP) \
  1026. TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ - 1, TERP, _Any, +, 0) \
  1027. TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
  1028. TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0) \
  1029. TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
  1030. TESTINTERPOLATE(0)
  1031. TESTINTERPOLATE(64)
  1032. TESTINTERPOLATE(128)
  1033. TESTINTERPOLATE(192)
  1034. TESTINTERPOLATE(255)
  1035. static int TestBlend(int width,
  1036. int height,
  1037. int benchmark_iterations,
  1038. int disable_cpu_flags,
  1039. int benchmark_cpu_info,
  1040. int invert,
  1041. int off,
  1042. int attenuate) {
  1043. if (width < 1) {
  1044. width = 1;
  1045. }
  1046. const int kBpp = 4;
  1047. const int kStride = width * kBpp;
  1048. align_buffer_page_end(src_argb_a, kStride * height + off);
  1049. align_buffer_page_end(src_argb_b, kStride * height + off);
  1050. align_buffer_page_end(dst_argb_c, kStride * height);
  1051. align_buffer_page_end(dst_argb_opt, kStride * height);
  1052. for (int i = 0; i < kStride * height; ++i) {
  1053. src_argb_a[i + off] = (fastrand() & 0xff);
  1054. src_argb_b[i + off] = (fastrand() & 0xff);
  1055. }
  1056. MemRandomize(src_argb_a, kStride * height + off);
  1057. MemRandomize(src_argb_b, kStride * height + off);
  1058. if (attenuate) {
  1059. ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
  1060. height);
  1061. }
  1062. memset(dst_argb_c, 255, kStride * height);
  1063. memset(dst_argb_opt, 255, kStride * height);
  1064. MaskCpuFlags(disable_cpu_flags);
  1065. ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
  1066. kStride, width, invert * height);
  1067. MaskCpuFlags(benchmark_cpu_info);
  1068. for (int i = 0; i < benchmark_iterations; ++i) {
  1069. ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride,
  1070. dst_argb_opt, kStride, width, invert * height);
  1071. }
  1072. int max_diff = 0;
  1073. for (int i = 0; i < kStride * height; ++i) {
  1074. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1075. static_cast<int>(dst_argb_opt[i]));
  1076. if (abs_diff > max_diff) {
  1077. max_diff = abs_diff;
  1078. }
  1079. }
  1080. free_aligned_buffer_page_end(src_argb_a);
  1081. free_aligned_buffer_page_end(src_argb_b);
  1082. free_aligned_buffer_page_end(dst_argb_c);
  1083. free_aligned_buffer_page_end(dst_argb_opt);
  1084. return max_diff;
  1085. }
  1086. TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
  1087. int max_diff =
  1088. TestBlend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
  1089. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
  1090. EXPECT_LE(max_diff, 1);
  1091. }
  1092. TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
  1093. int max_diff =
  1094. TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1095. disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
  1096. EXPECT_LE(max_diff, 1);
  1097. }
  1098. TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
  1099. int max_diff =
  1100. TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1101. disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
  1102. EXPECT_LE(max_diff, 1);
  1103. }
  1104. TEST_F(LibYUVPlanarTest, ARGBBlend_Unattenuated) {
  1105. int max_diff =
  1106. TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1107. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 0);
  1108. EXPECT_LE(max_diff, 1);
  1109. }
  1110. TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
  1111. int max_diff =
  1112. TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1113. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
  1114. EXPECT_LE(max_diff, 1);
  1115. }
  1116. static void TestBlendPlane(int width,
  1117. int height,
  1118. int benchmark_iterations,
  1119. int disable_cpu_flags,
  1120. int benchmark_cpu_info,
  1121. int invert,
  1122. int off) {
  1123. if (width < 1) {
  1124. width = 1;
  1125. }
  1126. const int kBpp = 1;
  1127. const int kStride = width * kBpp;
  1128. align_buffer_page_end(src_argb_a, kStride * height + off);
  1129. align_buffer_page_end(src_argb_b, kStride * height + off);
  1130. align_buffer_page_end(src_argb_alpha, kStride * height + off);
  1131. align_buffer_page_end(dst_argb_c, kStride * height + off);
  1132. align_buffer_page_end(dst_argb_opt, kStride * height + off);
  1133. memset(dst_argb_c, 255, kStride * height + off);
  1134. memset(dst_argb_opt, 255, kStride * height + off);
  1135. // Test source is maintained exactly if alpha is 255.
  1136. for (int i = 0; i < width; ++i) {
  1137. src_argb_a[i + off] = i & 255;
  1138. src_argb_b[i + off] = 255 - (i & 255);
  1139. }
  1140. memset(src_argb_alpha + off, 255, width);
  1141. BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
  1142. src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
  1143. for (int i = 0; i < width; ++i) {
  1144. EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
  1145. }
  1146. // Test destination is maintained exactly if alpha is 0.
  1147. memset(src_argb_alpha + off, 0, width);
  1148. BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
  1149. src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
  1150. for (int i = 0; i < width; ++i) {
  1151. EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
  1152. }
  1153. for (int i = 0; i < kStride * height; ++i) {
  1154. src_argb_a[i + off] = (fastrand() & 0xff);
  1155. src_argb_b[i + off] = (fastrand() & 0xff);
  1156. src_argb_alpha[i + off] = (fastrand() & 0xff);
  1157. }
  1158. MaskCpuFlags(disable_cpu_flags);
  1159. BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
  1160. src_argb_alpha + off, width, dst_argb_c + off, width, width,
  1161. invert * height);
  1162. MaskCpuFlags(benchmark_cpu_info);
  1163. for (int i = 0; i < benchmark_iterations; ++i) {
  1164. BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
  1165. src_argb_alpha + off, width, dst_argb_opt + off, width, width,
  1166. invert * height);
  1167. }
  1168. for (int i = 0; i < kStride * height; ++i) {
  1169. EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
  1170. }
  1171. free_aligned_buffer_page_end(src_argb_a);
  1172. free_aligned_buffer_page_end(src_argb_b);
  1173. free_aligned_buffer_page_end(src_argb_alpha);
  1174. free_aligned_buffer_page_end(dst_argb_c);
  1175. free_aligned_buffer_page_end(dst_argb_opt);
  1176. }
  1177. TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
  1178. TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1179. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1180. }
  1181. TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
  1182. TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1183. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1184. }
  1185. TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
  1186. TestBlendPlane(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
  1187. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1188. }
  1189. TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
  1190. TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1191. disable_cpu_flags_, benchmark_cpu_info_, -1, 1);
  1192. }
  1193. #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
  1194. static void TestI420Blend(int width,
  1195. int height,
  1196. int benchmark_iterations,
  1197. int disable_cpu_flags,
  1198. int benchmark_cpu_info,
  1199. int invert,
  1200. int off) {
  1201. width = ((width) > 0) ? (width) : 1;
  1202. const int kStrideUV = SUBSAMPLE(width, 2);
  1203. const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
  1204. align_buffer_page_end(src_y0, width * height + off);
  1205. align_buffer_page_end(src_u0, kSizeUV + off);
  1206. align_buffer_page_end(src_v0, kSizeUV + off);
  1207. align_buffer_page_end(src_y1, width * height + off);
  1208. align_buffer_page_end(src_u1, kSizeUV + off);
  1209. align_buffer_page_end(src_v1, kSizeUV + off);
  1210. align_buffer_page_end(src_a, width * height + off);
  1211. align_buffer_page_end(dst_y_c, width * height + off);
  1212. align_buffer_page_end(dst_u_c, kSizeUV + off);
  1213. align_buffer_page_end(dst_v_c, kSizeUV + off);
  1214. align_buffer_page_end(dst_y_opt, width * height + off);
  1215. align_buffer_page_end(dst_u_opt, kSizeUV + off);
  1216. align_buffer_page_end(dst_v_opt, kSizeUV + off);
  1217. MemRandomize(src_y0, width * height + off);
  1218. MemRandomize(src_u0, kSizeUV + off);
  1219. MemRandomize(src_v0, kSizeUV + off);
  1220. MemRandomize(src_y1, width * height + off);
  1221. MemRandomize(src_u1, kSizeUV + off);
  1222. MemRandomize(src_v1, kSizeUV + off);
  1223. MemRandomize(src_a, width * height + off);
  1224. memset(dst_y_c, 255, width * height + off);
  1225. memset(dst_u_c, 255, kSizeUV + off);
  1226. memset(dst_v_c, 255, kSizeUV + off);
  1227. memset(dst_y_opt, 255, width * height + off);
  1228. memset(dst_u_opt, 255, kSizeUV + off);
  1229. memset(dst_v_opt, 255, kSizeUV + off);
  1230. MaskCpuFlags(disable_cpu_flags);
  1231. I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
  1232. kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
  1233. src_v1 + off, kStrideUV, src_a + off, width, dst_y_c + off, width,
  1234. dst_u_c + off, kStrideUV, dst_v_c + off, kStrideUV, width,
  1235. invert * height);
  1236. MaskCpuFlags(benchmark_cpu_info);
  1237. for (int i = 0; i < benchmark_iterations; ++i) {
  1238. I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
  1239. kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
  1240. src_v1 + off, kStrideUV, src_a + off, width, dst_y_opt + off,
  1241. width, dst_u_opt + off, kStrideUV, dst_v_opt + off, kStrideUV,
  1242. width, invert * height);
  1243. }
  1244. for (int i = 0; i < width * height; ++i) {
  1245. EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
  1246. }
  1247. for (int i = 0; i < kSizeUV; ++i) {
  1248. EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]);
  1249. EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]);
  1250. }
  1251. free_aligned_buffer_page_end(src_y0);
  1252. free_aligned_buffer_page_end(src_u0);
  1253. free_aligned_buffer_page_end(src_v0);
  1254. free_aligned_buffer_page_end(src_y1);
  1255. free_aligned_buffer_page_end(src_u1);
  1256. free_aligned_buffer_page_end(src_v1);
  1257. free_aligned_buffer_page_end(src_a);
  1258. free_aligned_buffer_page_end(dst_y_c);
  1259. free_aligned_buffer_page_end(dst_u_c);
  1260. free_aligned_buffer_page_end(dst_v_c);
  1261. free_aligned_buffer_page_end(dst_y_opt);
  1262. free_aligned_buffer_page_end(dst_u_opt);
  1263. free_aligned_buffer_page_end(dst_v_opt);
  1264. }
  1265. TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
  1266. TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1267. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1268. }
  1269. TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
  1270. TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1271. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1272. }
  1273. // TODO(fbarchard): DISABLED because _Any uses C. Avoid C and re-enable.
  1274. TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) {
  1275. TestI420Blend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
  1276. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1277. }
  1278. TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
  1279. TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1280. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  1281. }
  1282. TEST_F(LibYUVPlanarTest, TestAffine) {
  1283. SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
  1284. SIMD_ALIGNED(uint8_t interpolate_pixels_C[1280][4]);
  1285. for (int i = 0; i < 1280; ++i) {
  1286. for (int j = 0; j < 4; ++j) {
  1287. orig_pixels_0[i][j] = i;
  1288. }
  1289. }
  1290. float uv_step[4] = {0.f, 0.f, 0.75f, 0.f};
  1291. ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], uv_step,
  1292. 1280);
  1293. EXPECT_EQ(0u, interpolate_pixels_C[0][0]);
  1294. EXPECT_EQ(96u, interpolate_pixels_C[128][0]);
  1295. EXPECT_EQ(191u, interpolate_pixels_C[255][3]);
  1296. #if defined(HAS_ARGBAFFINEROW_SSE2)
  1297. SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]);
  1298. ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
  1299. uv_step, 1280);
  1300. EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4));
  1301. int has_sse2 = TestCpuFlag(kCpuHasSSE2);
  1302. if (has_sse2) {
  1303. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  1304. ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
  1305. uv_step, 1280);
  1306. }
  1307. }
  1308. #endif
  1309. }
  1310. TEST_F(LibYUVPlanarTest, TestCopyPlane) {
  1311. int err = 0;
  1312. int yw = benchmark_width_;
  1313. int yh = benchmark_height_;
  1314. int b = 12;
  1315. int i, j;
  1316. int y_plane_size = (yw + b * 2) * (yh + b * 2);
  1317. align_buffer_page_end(orig_y, y_plane_size);
  1318. align_buffer_page_end(dst_c, y_plane_size);
  1319. align_buffer_page_end(dst_opt, y_plane_size);
  1320. memset(orig_y, 0, y_plane_size);
  1321. memset(dst_c, 0, y_plane_size);
  1322. memset(dst_opt, 0, y_plane_size);
  1323. // Fill image buffers with random data.
  1324. for (i = b; i < (yh + b); ++i) {
  1325. for (j = b; j < (yw + b); ++j) {
  1326. orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff;
  1327. }
  1328. }
  1329. // Fill destination buffers with random data.
  1330. for (i = 0; i < y_plane_size; ++i) {
  1331. uint8_t random_number = fastrand() & 0x7f;
  1332. dst_c[i] = random_number;
  1333. dst_opt[i] = dst_c[i];
  1334. }
  1335. int y_off = b * (yw + b * 2) + b;
  1336. int y_st = yw + b * 2;
  1337. int stride = 8;
  1338. // Disable all optimizations.
  1339. MaskCpuFlags(disable_cpu_flags_);
  1340. for (j = 0; j < benchmark_iterations_; j++) {
  1341. CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
  1342. }
  1343. // Enable optimizations.
  1344. MaskCpuFlags(benchmark_cpu_info_);
  1345. for (j = 0; j < benchmark_iterations_; j++) {
  1346. CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
  1347. }
  1348. for (i = 0; i < y_plane_size; ++i) {
  1349. if (dst_c[i] != dst_opt[i]) {
  1350. ++err;
  1351. }
  1352. }
  1353. free_aligned_buffer_page_end(orig_y);
  1354. free_aligned_buffer_page_end(dst_c);
  1355. free_aligned_buffer_page_end(dst_opt);
  1356. EXPECT_EQ(0, err);
  1357. }
  1358. static int TestMultiply(int width,
  1359. int height,
  1360. int benchmark_iterations,
  1361. int disable_cpu_flags,
  1362. int benchmark_cpu_info,
  1363. int invert,
  1364. int off) {
  1365. if (width < 1) {
  1366. width = 1;
  1367. }
  1368. const int kBpp = 4;
  1369. const int kStride = width * kBpp;
  1370. align_buffer_page_end(src_argb_a, kStride * height + off);
  1371. align_buffer_page_end(src_argb_b, kStride * height + off);
  1372. align_buffer_page_end(dst_argb_c, kStride * height);
  1373. align_buffer_page_end(dst_argb_opt, kStride * height);
  1374. for (int i = 0; i < kStride * height; ++i) {
  1375. src_argb_a[i + off] = (fastrand() & 0xff);
  1376. src_argb_b[i + off] = (fastrand() & 0xff);
  1377. }
  1378. memset(dst_argb_c, 0, kStride * height);
  1379. memset(dst_argb_opt, 0, kStride * height);
  1380. MaskCpuFlags(disable_cpu_flags);
  1381. ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
  1382. kStride, width, invert * height);
  1383. MaskCpuFlags(benchmark_cpu_info);
  1384. for (int i = 0; i < benchmark_iterations; ++i) {
  1385. ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride,
  1386. dst_argb_opt, kStride, width, invert * height);
  1387. }
  1388. int max_diff = 0;
  1389. for (int i = 0; i < kStride * height; ++i) {
  1390. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1391. static_cast<int>(dst_argb_opt[i]));
  1392. if (abs_diff > max_diff) {
  1393. max_diff = abs_diff;
  1394. }
  1395. }
  1396. free_aligned_buffer_page_end(src_argb_a);
  1397. free_aligned_buffer_page_end(src_argb_b);
  1398. free_aligned_buffer_page_end(dst_argb_c);
  1399. free_aligned_buffer_page_end(dst_argb_opt);
  1400. return max_diff;
  1401. }
  1402. TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
  1403. int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_,
  1404. benchmark_iterations_, disable_cpu_flags_,
  1405. benchmark_cpu_info_, +1, 0);
  1406. EXPECT_LE(max_diff, 1);
  1407. }
  1408. TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
  1409. int max_diff =
  1410. TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1411. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1412. EXPECT_LE(max_diff, 1);
  1413. }
  1414. TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
  1415. int max_diff =
  1416. TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1417. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  1418. EXPECT_LE(max_diff, 1);
  1419. }
  1420. TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
  1421. int max_diff =
  1422. TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1423. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1424. EXPECT_LE(max_diff, 1);
  1425. }
  1426. static int TestAdd(int width,
  1427. int height,
  1428. int benchmark_iterations,
  1429. int disable_cpu_flags,
  1430. int benchmark_cpu_info,
  1431. int invert,
  1432. int off) {
  1433. if (width < 1) {
  1434. width = 1;
  1435. }
  1436. const int kBpp = 4;
  1437. const int kStride = width * kBpp;
  1438. align_buffer_page_end(src_argb_a, kStride * height + off);
  1439. align_buffer_page_end(src_argb_b, kStride * height + off);
  1440. align_buffer_page_end(dst_argb_c, kStride * height);
  1441. align_buffer_page_end(dst_argb_opt, kStride * height);
  1442. for (int i = 0; i < kStride * height; ++i) {
  1443. src_argb_a[i + off] = (fastrand() & 0xff);
  1444. src_argb_b[i + off] = (fastrand() & 0xff);
  1445. }
  1446. memset(dst_argb_c, 0, kStride * height);
  1447. memset(dst_argb_opt, 0, kStride * height);
  1448. MaskCpuFlags(disable_cpu_flags);
  1449. ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
  1450. kStride, width, invert * height);
  1451. MaskCpuFlags(benchmark_cpu_info);
  1452. for (int i = 0; i < benchmark_iterations; ++i) {
  1453. ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_opt,
  1454. kStride, width, invert * height);
  1455. }
  1456. int max_diff = 0;
  1457. for (int i = 0; i < kStride * height; ++i) {
  1458. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1459. static_cast<int>(dst_argb_opt[i]));
  1460. if (abs_diff > max_diff) {
  1461. max_diff = abs_diff;
  1462. }
  1463. }
  1464. free_aligned_buffer_page_end(src_argb_a);
  1465. free_aligned_buffer_page_end(src_argb_b);
  1466. free_aligned_buffer_page_end(dst_argb_c);
  1467. free_aligned_buffer_page_end(dst_argb_opt);
  1468. return max_diff;
  1469. }
  1470. TEST_F(LibYUVPlanarTest, ARGBAdd_Any) {
  1471. int max_diff =
  1472. TestAdd(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
  1473. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1474. EXPECT_LE(max_diff, 1);
  1475. }
  1476. TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) {
  1477. int max_diff =
  1478. TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1479. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1480. EXPECT_LE(max_diff, 1);
  1481. }
  1482. TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) {
  1483. int max_diff =
  1484. TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1485. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  1486. EXPECT_LE(max_diff, 1);
  1487. }
  1488. TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) {
  1489. int max_diff =
  1490. TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1491. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1492. EXPECT_LE(max_diff, 1);
  1493. }
  1494. static int TestSubtract(int width,
  1495. int height,
  1496. int benchmark_iterations,
  1497. int disable_cpu_flags,
  1498. int benchmark_cpu_info,
  1499. int invert,
  1500. int off) {
  1501. if (width < 1) {
  1502. width = 1;
  1503. }
  1504. const int kBpp = 4;
  1505. const int kStride = width * kBpp;
  1506. align_buffer_page_end(src_argb_a, kStride * height + off);
  1507. align_buffer_page_end(src_argb_b, kStride * height + off);
  1508. align_buffer_page_end(dst_argb_c, kStride * height);
  1509. align_buffer_page_end(dst_argb_opt, kStride * height);
  1510. for (int i = 0; i < kStride * height; ++i) {
  1511. src_argb_a[i + off] = (fastrand() & 0xff);
  1512. src_argb_b[i + off] = (fastrand() & 0xff);
  1513. }
  1514. memset(dst_argb_c, 0, kStride * height);
  1515. memset(dst_argb_opt, 0, kStride * height);
  1516. MaskCpuFlags(disable_cpu_flags);
  1517. ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
  1518. kStride, width, invert * height);
  1519. MaskCpuFlags(benchmark_cpu_info);
  1520. for (int i = 0; i < benchmark_iterations; ++i) {
  1521. ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride,
  1522. dst_argb_opt, kStride, width, invert * height);
  1523. }
  1524. int max_diff = 0;
  1525. for (int i = 0; i < kStride * height; ++i) {
  1526. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1527. static_cast<int>(dst_argb_opt[i]));
  1528. if (abs_diff > max_diff) {
  1529. max_diff = abs_diff;
  1530. }
  1531. }
  1532. free_aligned_buffer_page_end(src_argb_a);
  1533. free_aligned_buffer_page_end(src_argb_b);
  1534. free_aligned_buffer_page_end(dst_argb_c);
  1535. free_aligned_buffer_page_end(dst_argb_opt);
  1536. return max_diff;
  1537. }
  1538. TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) {
  1539. int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_,
  1540. benchmark_iterations_, disable_cpu_flags_,
  1541. benchmark_cpu_info_, +1, 0);
  1542. EXPECT_LE(max_diff, 1);
  1543. }
  1544. TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) {
  1545. int max_diff =
  1546. TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1547. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1548. EXPECT_LE(max_diff, 1);
  1549. }
  1550. TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) {
  1551. int max_diff =
  1552. TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1553. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  1554. EXPECT_LE(max_diff, 1);
  1555. }
  1556. TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) {
  1557. int max_diff =
  1558. TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1559. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1560. EXPECT_LE(max_diff, 1);
  1561. }
  1562. static int TestSobel(int width,
  1563. int height,
  1564. int benchmark_iterations,
  1565. int disable_cpu_flags,
  1566. int benchmark_cpu_info,
  1567. int invert,
  1568. int off) {
  1569. if (width < 1) {
  1570. width = 1;
  1571. }
  1572. const int kBpp = 4;
  1573. const int kStride = width * kBpp;
  1574. align_buffer_page_end(src_argb_a, kStride * height + off);
  1575. align_buffer_page_end(dst_argb_c, kStride * height);
  1576. align_buffer_page_end(dst_argb_opt, kStride * height);
  1577. memset(src_argb_a, 0, kStride * height + off);
  1578. for (int i = 0; i < kStride * height; ++i) {
  1579. src_argb_a[i + off] = (fastrand() & 0xff);
  1580. }
  1581. memset(dst_argb_c, 0, kStride * height);
  1582. memset(dst_argb_opt, 0, kStride * height);
  1583. MaskCpuFlags(disable_cpu_flags);
  1584. ARGBSobel(src_argb_a + off, kStride, dst_argb_c, kStride, width,
  1585. invert * height);
  1586. MaskCpuFlags(benchmark_cpu_info);
  1587. for (int i = 0; i < benchmark_iterations; ++i) {
  1588. ARGBSobel(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
  1589. invert * height);
  1590. }
  1591. int max_diff = 0;
  1592. for (int i = 0; i < kStride * height; ++i) {
  1593. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1594. static_cast<int>(dst_argb_opt[i]));
  1595. if (abs_diff > max_diff) {
  1596. max_diff = abs_diff;
  1597. }
  1598. }
  1599. free_aligned_buffer_page_end(src_argb_a);
  1600. free_aligned_buffer_page_end(dst_argb_c);
  1601. free_aligned_buffer_page_end(dst_argb_opt);
  1602. return max_diff;
  1603. }
  1604. TEST_F(LibYUVPlanarTest, ARGBSobel_Any) {
  1605. int max_diff =
  1606. TestSobel(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
  1607. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1608. EXPECT_EQ(0, max_diff);
  1609. }
  1610. TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) {
  1611. int max_diff =
  1612. TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1613. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1614. EXPECT_EQ(0, max_diff);
  1615. }
  1616. TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) {
  1617. int max_diff =
  1618. TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1619. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  1620. EXPECT_EQ(0, max_diff);
  1621. }
  1622. TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) {
  1623. int max_diff =
  1624. TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1625. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1626. EXPECT_EQ(0, max_diff);
  1627. }
  1628. static int TestSobelToPlane(int width,
  1629. int height,
  1630. int benchmark_iterations,
  1631. int disable_cpu_flags,
  1632. int benchmark_cpu_info,
  1633. int invert,
  1634. int off) {
  1635. if (width < 1) {
  1636. width = 1;
  1637. }
  1638. const int kSrcBpp = 4;
  1639. const int kDstBpp = 1;
  1640. const int kSrcStride = (width * kSrcBpp + 15) & ~15;
  1641. const int kDstStride = (width * kDstBpp + 15) & ~15;
  1642. align_buffer_page_end(src_argb_a, kSrcStride * height + off);
  1643. align_buffer_page_end(dst_argb_c, kDstStride * height);
  1644. align_buffer_page_end(dst_argb_opt, kDstStride * height);
  1645. memset(src_argb_a, 0, kSrcStride * height + off);
  1646. for (int i = 0; i < kSrcStride * height; ++i) {
  1647. src_argb_a[i + off] = (fastrand() & 0xff);
  1648. }
  1649. memset(dst_argb_c, 0, kDstStride * height);
  1650. memset(dst_argb_opt, 0, kDstStride * height);
  1651. MaskCpuFlags(disable_cpu_flags);
  1652. ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_c, kDstStride, width,
  1653. invert * height);
  1654. MaskCpuFlags(benchmark_cpu_info);
  1655. for (int i = 0; i < benchmark_iterations; ++i) {
  1656. ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_opt, kDstStride,
  1657. width, invert * height);
  1658. }
  1659. int max_diff = 0;
  1660. for (int i = 0; i < kDstStride * height; ++i) {
  1661. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1662. static_cast<int>(dst_argb_opt[i]));
  1663. if (abs_diff > max_diff) {
  1664. max_diff = abs_diff;
  1665. }
  1666. }
  1667. free_aligned_buffer_page_end(src_argb_a);
  1668. free_aligned_buffer_page_end(dst_argb_c);
  1669. free_aligned_buffer_page_end(dst_argb_opt);
  1670. return max_diff;
  1671. }
  1672. TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) {
  1673. int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_,
  1674. benchmark_iterations_, disable_cpu_flags_,
  1675. benchmark_cpu_info_, +1, 0);
  1676. EXPECT_EQ(0, max_diff);
  1677. }
  1678. TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) {
  1679. int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
  1680. benchmark_iterations_, disable_cpu_flags_,
  1681. benchmark_cpu_info_, +1, 1);
  1682. EXPECT_EQ(0, max_diff);
  1683. }
  1684. TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) {
  1685. int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
  1686. benchmark_iterations_, disable_cpu_flags_,
  1687. benchmark_cpu_info_, -1, 0);
  1688. EXPECT_EQ(0, max_diff);
  1689. }
  1690. TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) {
  1691. int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
  1692. benchmark_iterations_, disable_cpu_flags_,
  1693. benchmark_cpu_info_, +1, 0);
  1694. EXPECT_EQ(0, max_diff);
  1695. }
  1696. static int TestSobelXY(int width,
  1697. int height,
  1698. int benchmark_iterations,
  1699. int disable_cpu_flags,
  1700. int benchmark_cpu_info,
  1701. int invert,
  1702. int off) {
  1703. if (width < 1) {
  1704. width = 1;
  1705. }
  1706. const int kBpp = 4;
  1707. const int kStride = width * kBpp;
  1708. align_buffer_page_end(src_argb_a, kStride * height + off);
  1709. align_buffer_page_end(dst_argb_c, kStride * height);
  1710. align_buffer_page_end(dst_argb_opt, kStride * height);
  1711. memset(src_argb_a, 0, kStride * height + off);
  1712. for (int i = 0; i < kStride * height; ++i) {
  1713. src_argb_a[i + off] = (fastrand() & 0xff);
  1714. }
  1715. memset(dst_argb_c, 0, kStride * height);
  1716. memset(dst_argb_opt, 0, kStride * height);
  1717. MaskCpuFlags(disable_cpu_flags);
  1718. ARGBSobelXY(src_argb_a + off, kStride, dst_argb_c, kStride, width,
  1719. invert * height);
  1720. MaskCpuFlags(benchmark_cpu_info);
  1721. for (int i = 0; i < benchmark_iterations; ++i) {
  1722. ARGBSobelXY(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
  1723. invert * height);
  1724. }
  1725. int max_diff = 0;
  1726. for (int i = 0; i < kStride * height; ++i) {
  1727. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1728. static_cast<int>(dst_argb_opt[i]));
  1729. if (abs_diff > max_diff) {
  1730. max_diff = abs_diff;
  1731. }
  1732. }
  1733. free_aligned_buffer_page_end(src_argb_a);
  1734. free_aligned_buffer_page_end(dst_argb_c);
  1735. free_aligned_buffer_page_end(dst_argb_opt);
  1736. return max_diff;
  1737. }
  1738. TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) {
  1739. int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_,
  1740. benchmark_iterations_, disable_cpu_flags_,
  1741. benchmark_cpu_info_, +1, 0);
  1742. EXPECT_EQ(0, max_diff);
  1743. }
  1744. TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) {
  1745. int max_diff =
  1746. TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1747. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1748. EXPECT_EQ(0, max_diff);
  1749. }
  1750. TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) {
  1751. int max_diff =
  1752. TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1753. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  1754. EXPECT_EQ(0, max_diff);
  1755. }
  1756. TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) {
  1757. int max_diff =
  1758. TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1759. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1760. EXPECT_EQ(0, max_diff);
  1761. }
  1762. static int TestBlur(int width,
  1763. int height,
  1764. int benchmark_iterations,
  1765. int disable_cpu_flags,
  1766. int benchmark_cpu_info,
  1767. int invert,
  1768. int off,
  1769. int radius) {
  1770. if (width < 1) {
  1771. width = 1;
  1772. }
  1773. const int kBpp = 4;
  1774. const int kStride = width * kBpp;
  1775. align_buffer_page_end(src_argb_a, kStride * height + off);
  1776. align_buffer_page_end(dst_cumsum, width * height * 16);
  1777. align_buffer_page_end(dst_argb_c, kStride * height);
  1778. align_buffer_page_end(dst_argb_opt, kStride * height);
  1779. for (int i = 0; i < kStride * height; ++i) {
  1780. src_argb_a[i + off] = (fastrand() & 0xff);
  1781. }
  1782. memset(dst_cumsum, 0, width * height * 16);
  1783. memset(dst_argb_c, 0, kStride * height);
  1784. memset(dst_argb_opt, 0, kStride * height);
  1785. MaskCpuFlags(disable_cpu_flags);
  1786. ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride,
  1787. reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
  1788. invert * height, radius);
  1789. MaskCpuFlags(benchmark_cpu_info);
  1790. for (int i = 0; i < benchmark_iterations; ++i) {
  1791. ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride,
  1792. reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
  1793. invert * height, radius);
  1794. }
  1795. int max_diff = 0;
  1796. for (int i = 0; i < kStride * height; ++i) {
  1797. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1798. static_cast<int>(dst_argb_opt[i]));
  1799. if (abs_diff > max_diff) {
  1800. max_diff = abs_diff;
  1801. }
  1802. }
  1803. free_aligned_buffer_page_end(src_argb_a);
  1804. free_aligned_buffer_page_end(dst_cumsum);
  1805. free_aligned_buffer_page_end(dst_argb_c);
  1806. free_aligned_buffer_page_end(dst_argb_opt);
  1807. return max_diff;
  1808. }
  1809. static const int kBlurSize = 55;
  1810. TEST_F(LibYUVPlanarTest, ARGBBlur_Any) {
  1811. int max_diff =
  1812. TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
  1813. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
  1814. EXPECT_LE(max_diff, 1);
  1815. }
  1816. TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) {
  1817. int max_diff =
  1818. TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1819. disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize);
  1820. EXPECT_LE(max_diff, 1);
  1821. }
  1822. TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) {
  1823. int max_diff =
  1824. TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1825. disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize);
  1826. EXPECT_LE(max_diff, 1);
  1827. }
  1828. TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) {
  1829. int max_diff =
  1830. TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1831. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
  1832. EXPECT_LE(max_diff, 1);
  1833. }
  1834. static const int kBlurSmallSize = 5;
  1835. TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) {
  1836. int max_diff =
  1837. TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
  1838. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
  1839. EXPECT_LE(max_diff, 1);
  1840. }
  1841. TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) {
  1842. int max_diff =
  1843. TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1844. disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize);
  1845. EXPECT_LE(max_diff, 1);
  1846. }
  1847. TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) {
  1848. int max_diff =
  1849. TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1850. disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize);
  1851. EXPECT_LE(max_diff, 1);
  1852. }
  1853. TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) {
  1854. int max_diff =
  1855. TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1856. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
  1857. EXPECT_LE(max_diff, 1);
  1858. }
  1859. TEST_F(LibYUVPlanarTest, TestARGBPolynomial) {
  1860. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  1861. SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
  1862. SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
  1863. memset(orig_pixels, 0, sizeof(orig_pixels));
  1864. SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
  1865. 0.94230f, -3.03300f, -2.92500f, 0.f, // C0
  1866. 0.584500f, 1.112000f, 1.535000f, 1.f, // C1 x
  1867. 0.001313f, -0.002503f, -0.004496f, 0.f, // C2 x * x
  1868. 0.0f, 0.000006965f, 0.000008781f, 0.f, // C3 x * x * x
  1869. };
  1870. // Test blue
  1871. orig_pixels[0][0] = 255u;
  1872. orig_pixels[0][1] = 0u;
  1873. orig_pixels[0][2] = 0u;
  1874. orig_pixels[0][3] = 128u;
  1875. // Test green
  1876. orig_pixels[1][0] = 0u;
  1877. orig_pixels[1][1] = 255u;
  1878. orig_pixels[1][2] = 0u;
  1879. orig_pixels[1][3] = 0u;
  1880. // Test red
  1881. orig_pixels[2][0] = 0u;
  1882. orig_pixels[2][1] = 0u;
  1883. orig_pixels[2][2] = 255u;
  1884. orig_pixels[2][3] = 255u;
  1885. // Test white
  1886. orig_pixels[3][0] = 255u;
  1887. orig_pixels[3][1] = 255u;
  1888. orig_pixels[3][2] = 255u;
  1889. orig_pixels[3][3] = 255u;
  1890. // Test color
  1891. orig_pixels[4][0] = 16u;
  1892. orig_pixels[4][1] = 64u;
  1893. orig_pixels[4][2] = 192u;
  1894. orig_pixels[4][3] = 224u;
  1895. // Do 16 to test asm version.
  1896. ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
  1897. &kWarmifyPolynomial[0], 16, 1);
  1898. EXPECT_EQ(235u, dst_pixels_opt[0][0]);
  1899. EXPECT_EQ(0u, dst_pixels_opt[0][1]);
  1900. EXPECT_EQ(0u, dst_pixels_opt[0][2]);
  1901. EXPECT_EQ(128u, dst_pixels_opt[0][3]);
  1902. EXPECT_EQ(0u, dst_pixels_opt[1][0]);
  1903. EXPECT_EQ(233u, dst_pixels_opt[1][1]);
  1904. EXPECT_EQ(0u, dst_pixels_opt[1][2]);
  1905. EXPECT_EQ(0u, dst_pixels_opt[1][3]);
  1906. EXPECT_EQ(0u, dst_pixels_opt[2][0]);
  1907. EXPECT_EQ(0u, dst_pixels_opt[2][1]);
  1908. EXPECT_EQ(241u, dst_pixels_opt[2][2]);
  1909. EXPECT_EQ(255u, dst_pixels_opt[2][3]);
  1910. EXPECT_EQ(235u, dst_pixels_opt[3][0]);
  1911. EXPECT_EQ(233u, dst_pixels_opt[3][1]);
  1912. EXPECT_EQ(241u, dst_pixels_opt[3][2]);
  1913. EXPECT_EQ(255u, dst_pixels_opt[3][3]);
  1914. EXPECT_EQ(10u, dst_pixels_opt[4][0]);
  1915. EXPECT_EQ(59u, dst_pixels_opt[4][1]);
  1916. EXPECT_EQ(188u, dst_pixels_opt[4][2]);
  1917. EXPECT_EQ(224u, dst_pixels_opt[4][3]);
  1918. for (int i = 0; i < 1280; ++i) {
  1919. orig_pixels[i][0] = i;
  1920. orig_pixels[i][1] = i / 2;
  1921. orig_pixels[i][2] = i / 3;
  1922. orig_pixels[i][3] = i;
  1923. }
  1924. MaskCpuFlags(disable_cpu_flags_);
  1925. ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
  1926. &kWarmifyPolynomial[0], 1280, 1);
  1927. MaskCpuFlags(benchmark_cpu_info_);
  1928. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  1929. ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
  1930. &kWarmifyPolynomial[0], 1280, 1);
  1931. }
  1932. for (int i = 0; i < 1280; ++i) {
  1933. EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
  1934. EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
  1935. EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
  1936. EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
  1937. }
  1938. }
  1939. int TestHalfFloatPlane(int benchmark_width,
  1940. int benchmark_height,
  1941. int benchmark_iterations,
  1942. int disable_cpu_flags,
  1943. int benchmark_cpu_info,
  1944. float scale,
  1945. int mask) {
  1946. int i, j;
  1947. const int y_plane_size = benchmark_width * benchmark_height * 2;
  1948. align_buffer_page_end(orig_y, y_plane_size * 3);
  1949. uint8_t* dst_opt = orig_y + y_plane_size;
  1950. uint8_t* dst_c = orig_y + y_plane_size * 2;
  1951. MemRandomize(orig_y, y_plane_size);
  1952. memset(dst_c, 0, y_plane_size);
  1953. memset(dst_opt, 1, y_plane_size);
  1954. for (i = 0; i < y_plane_size / 2; ++i) {
  1955. reinterpret_cast<uint16_t*>(orig_y)[i] &= mask;
  1956. }
  1957. // Disable all optimizations.
  1958. MaskCpuFlags(disable_cpu_flags);
  1959. for (j = 0; j < benchmark_iterations; j++) {
  1960. HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
  1961. reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
  1962. scale, benchmark_width, benchmark_height);
  1963. }
  1964. // Enable optimizations.
  1965. MaskCpuFlags(benchmark_cpu_info);
  1966. for (j = 0; j < benchmark_iterations; j++) {
  1967. HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
  1968. reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2,
  1969. scale, benchmark_width, benchmark_height);
  1970. }
  1971. int max_diff = 0;
  1972. for (i = 0; i < y_plane_size / 2; ++i) {
  1973. int abs_diff =
  1974. abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) -
  1975. static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i]));
  1976. if (abs_diff > max_diff) {
  1977. max_diff = abs_diff;
  1978. }
  1979. }
  1980. free_aligned_buffer_page_end(orig_y);
  1981. return max_diff;
  1982. }
  1983. #if defined(__arm__)
  1984. static void EnableFlushDenormalToZero(void) {
  1985. uint32_t cw;
  1986. __asm__ __volatile__(
  1987. "vmrs %0, fpscr \n"
  1988. "orr %0, %0, #0x1000000 \n"
  1989. "vmsr fpscr, %0 \n"
  1990. : "=r"(cw)::"memory");
  1991. }
  1992. #endif
  1993. // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
  1994. // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally
  1995. // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
  1996. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
  1997. // 32 bit arm rounding on denormal case is off by 1 compared to C.
  1998. #if defined(__arm__)
  1999. EnableFlushDenormalToZero();
  2000. #endif
  2001. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  2002. benchmark_iterations_, disable_cpu_flags_,
  2003. benchmark_cpu_info_, 1.0f / 65536.0f, 65535);
  2004. EXPECT_EQ(0, diff);
  2005. }
  2006. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
  2007. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  2008. benchmark_iterations_, disable_cpu_flags_,
  2009. benchmark_cpu_info_, 1.0f, 65535);
  2010. EXPECT_LE(diff, 1);
  2011. }
  2012. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
  2013. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  2014. benchmark_iterations_, disable_cpu_flags_,
  2015. benchmark_cpu_info_, 1.0f / 4096.0f, 65535);
  2016. EXPECT_EQ(0, diff);
  2017. }
  2018. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
  2019. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  2020. benchmark_iterations_, disable_cpu_flags_,
  2021. benchmark_cpu_info_, 1.0f / 1024.0f, 1023);
  2022. EXPECT_EQ(0, diff);
  2023. }
  2024. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
  2025. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  2026. benchmark_iterations_, disable_cpu_flags_,
  2027. benchmark_cpu_info_, 1.0f / 512.0f, 511);
  2028. EXPECT_EQ(0, diff);
  2029. }
  2030. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
  2031. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  2032. benchmark_iterations_, disable_cpu_flags_,
  2033. benchmark_cpu_info_, 1.0f / 4096.0f, 4095);
  2034. EXPECT_EQ(0, diff);
  2035. }
  2036. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
  2037. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  2038. benchmark_iterations_, disable_cpu_flags_,
  2039. benchmark_cpu_info_, 1.0f / 4095.0f, 4095);
  2040. EXPECT_EQ(0, diff);
  2041. }
  2042. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
  2043. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  2044. benchmark_iterations_, disable_cpu_flags_,
  2045. benchmark_cpu_info_, 1.0f, 2047);
  2046. EXPECT_EQ(0, diff);
  2047. }
  2048. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
  2049. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  2050. benchmark_iterations_, disable_cpu_flags_,
  2051. benchmark_cpu_info_, 1.0f, 4095);
  2052. EXPECT_LE(diff, 1);
  2053. }
  2054. float TestByteToFloat(int benchmark_width,
  2055. int benchmark_height,
  2056. int benchmark_iterations,
  2057. int disable_cpu_flags,
  2058. int benchmark_cpu_info,
  2059. float scale) {
  2060. int i, j;
  2061. const int y_plane_size = benchmark_width * benchmark_height;
  2062. align_buffer_page_end(orig_y, y_plane_size * (1 + 4 + 4));
  2063. float* dst_opt = reinterpret_cast<float*>(orig_y + y_plane_size);
  2064. float* dst_c = reinterpret_cast<float*>(orig_y + y_plane_size * 5);
  2065. MemRandomize(orig_y, y_plane_size);
  2066. memset(dst_c, 0, y_plane_size * 4);
  2067. memset(dst_opt, 1, y_plane_size * 4);
  2068. // Disable all optimizations.
  2069. MaskCpuFlags(disable_cpu_flags);
  2070. ByteToFloat(orig_y, dst_c, scale, y_plane_size);
  2071. // Enable optimizations.
  2072. MaskCpuFlags(benchmark_cpu_info);
  2073. for (j = 0; j < benchmark_iterations; j++) {
  2074. ByteToFloat(orig_y, dst_opt, scale, y_plane_size);
  2075. }
  2076. float max_diff = 0;
  2077. for (i = 0; i < y_plane_size; ++i) {
  2078. float abs_diff = fabs(dst_c[i] - dst_opt[i]);
  2079. if (abs_diff > max_diff) {
  2080. max_diff = abs_diff;
  2081. }
  2082. }
  2083. free_aligned_buffer_page_end(orig_y);
  2084. return max_diff;
  2085. }
  2086. TEST_F(LibYUVPlanarTest, TestByteToFloat) {
  2087. float diff = TestByteToFloat(benchmark_width_, benchmark_height_,
  2088. benchmark_iterations_, disable_cpu_flags_,
  2089. benchmark_cpu_info_, 1.0f);
  2090. EXPECT_EQ(0.f, diff);
  2091. }
  2092. TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
  2093. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  2094. SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
  2095. SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
  2096. memset(orig_pixels, 0, sizeof(orig_pixels));
  2097. align_buffer_page_end(lumacolortable, 32768);
  2098. int v = 0;
  2099. for (int i = 0; i < 32768; ++i) {
  2100. lumacolortable[i] = v;
  2101. v += 3;
  2102. }
  2103. // Test blue
  2104. orig_pixels[0][0] = 255u;
  2105. orig_pixels[0][1] = 0u;
  2106. orig_pixels[0][2] = 0u;
  2107. orig_pixels[0][3] = 128u;
  2108. // Test green
  2109. orig_pixels[1][0] = 0u;
  2110. orig_pixels[1][1] = 255u;
  2111. orig_pixels[1][2] = 0u;
  2112. orig_pixels[1][3] = 0u;
  2113. // Test red
  2114. orig_pixels[2][0] = 0u;
  2115. orig_pixels[2][1] = 0u;
  2116. orig_pixels[2][2] = 255u;
  2117. orig_pixels[2][3] = 255u;
  2118. // Test color
  2119. orig_pixels[3][0] = 16u;
  2120. orig_pixels[3][1] = 64u;
  2121. orig_pixels[3][2] = 192u;
  2122. orig_pixels[3][3] = 224u;
  2123. // Do 16 to test asm version.
  2124. ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
  2125. &lumacolortable[0], 16, 1);
  2126. EXPECT_EQ(253u, dst_pixels_opt[0][0]);
  2127. EXPECT_EQ(0u, dst_pixels_opt[0][1]);
  2128. EXPECT_EQ(0u, dst_pixels_opt[0][2]);
  2129. EXPECT_EQ(128u, dst_pixels_opt[0][3]);
  2130. EXPECT_EQ(0u, dst_pixels_opt[1][0]);
  2131. EXPECT_EQ(253u, dst_pixels_opt[1][1]);
  2132. EXPECT_EQ(0u, dst_pixels_opt[1][2]);
  2133. EXPECT_EQ(0u, dst_pixels_opt[1][3]);
  2134. EXPECT_EQ(0u, dst_pixels_opt[2][0]);
  2135. EXPECT_EQ(0u, dst_pixels_opt[2][1]);
  2136. EXPECT_EQ(253u, dst_pixels_opt[2][2]);
  2137. EXPECT_EQ(255u, dst_pixels_opt[2][3]);
  2138. EXPECT_EQ(48u, dst_pixels_opt[3][0]);
  2139. EXPECT_EQ(192u, dst_pixels_opt[3][1]);
  2140. EXPECT_EQ(64u, dst_pixels_opt[3][2]);
  2141. EXPECT_EQ(224u, dst_pixels_opt[3][3]);
  2142. for (int i = 0; i < 1280; ++i) {
  2143. orig_pixels[i][0] = i;
  2144. orig_pixels[i][1] = i / 2;
  2145. orig_pixels[i][2] = i / 3;
  2146. orig_pixels[i][3] = i;
  2147. }
  2148. MaskCpuFlags(disable_cpu_flags_);
  2149. ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
  2150. lumacolortable, 1280, 1);
  2151. MaskCpuFlags(benchmark_cpu_info_);
  2152. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  2153. ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
  2154. lumacolortable, 1280, 1);
  2155. }
  2156. for (int i = 0; i < 1280; ++i) {
  2157. EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
  2158. EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
  2159. EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
  2160. EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
  2161. }
  2162. free_aligned_buffer_page_end(lumacolortable);
  2163. }
  2164. TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
  2165. const int kSize = benchmark_width_ * benchmark_height_ * 4;
  2166. align_buffer_page_end(orig_pixels, kSize);
  2167. align_buffer_page_end(dst_pixels_opt, kSize);
  2168. align_buffer_page_end(dst_pixels_c, kSize);
  2169. MemRandomize(orig_pixels, kSize);
  2170. MemRandomize(dst_pixels_opt, kSize);
  2171. memcpy(dst_pixels_c, dst_pixels_opt, kSize);
  2172. MaskCpuFlags(disable_cpu_flags_);
  2173. ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_c,
  2174. benchmark_width_ * 4, benchmark_width_, benchmark_height_);
  2175. MaskCpuFlags(benchmark_cpu_info_);
  2176. for (int i = 0; i < benchmark_iterations_; ++i) {
  2177. ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_opt,
  2178. benchmark_width_ * 4, benchmark_width_, benchmark_height_);
  2179. }
  2180. for (int i = 0; i < kSize; ++i) {
  2181. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2182. }
  2183. free_aligned_buffer_page_end(dst_pixels_c);
  2184. free_aligned_buffer_page_end(dst_pixels_opt);
  2185. free_aligned_buffer_page_end(orig_pixels);
  2186. }
  2187. TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
  2188. // Round count up to multiple of 16
  2189. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2190. align_buffer_page_end(src_pixels, kPixels * 4);
  2191. align_buffer_page_end(dst_pixels_opt, kPixels);
  2192. align_buffer_page_end(dst_pixels_c, kPixels);
  2193. MemRandomize(src_pixels, kPixels * 4);
  2194. MemRandomize(dst_pixels_opt, kPixels);
  2195. memcpy(dst_pixels_c, dst_pixels_opt, kPixels);
  2196. MaskCpuFlags(disable_cpu_flags_);
  2197. ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
  2198. benchmark_width_, benchmark_width_, benchmark_height_);
  2199. MaskCpuFlags(benchmark_cpu_info_);
  2200. for (int i = 0; i < benchmark_iterations_; ++i) {
  2201. ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
  2202. benchmark_width_, benchmark_width_, benchmark_height_);
  2203. }
  2204. for (int i = 0; i < kPixels; ++i) {
  2205. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2206. }
  2207. free_aligned_buffer_page_end(dst_pixels_c);
  2208. free_aligned_buffer_page_end(dst_pixels_opt);
  2209. free_aligned_buffer_page_end(src_pixels);
  2210. }
  2211. TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
  2212. // Round count up to multiple of 16
  2213. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2214. align_buffer_page_end(orig_pixels, kPixels);
  2215. align_buffer_page_end(dst_pixels_opt, kPixels * 4);
  2216. align_buffer_page_end(dst_pixels_c, kPixels * 4);
  2217. MemRandomize(orig_pixels, kPixels);
  2218. MemRandomize(dst_pixels_opt, kPixels * 4);
  2219. memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4);
  2220. MaskCpuFlags(disable_cpu_flags_);
  2221. ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
  2222. benchmark_width_ * 4, benchmark_width_, benchmark_height_);
  2223. MaskCpuFlags(benchmark_cpu_info_);
  2224. for (int i = 0; i < benchmark_iterations_; ++i) {
  2225. ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
  2226. benchmark_width_ * 4, benchmark_width_, benchmark_height_);
  2227. }
  2228. for (int i = 0; i < kPixels * 4; ++i) {
  2229. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2230. }
  2231. free_aligned_buffer_page_end(dst_pixels_c);
  2232. free_aligned_buffer_page_end(dst_pixels_opt);
  2233. free_aligned_buffer_page_end(orig_pixels);
  2234. }
  2235. static int TestARGBRect(int width,
  2236. int height,
  2237. int benchmark_iterations,
  2238. int disable_cpu_flags,
  2239. int benchmark_cpu_info,
  2240. int invert,
  2241. int off,
  2242. int bpp) {
  2243. if (width < 1) {
  2244. width = 1;
  2245. }
  2246. const int kStride = width * bpp;
  2247. const int kSize = kStride * height;
  2248. const uint32_t v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff);
  2249. align_buffer_page_end(dst_argb_c, kSize + off);
  2250. align_buffer_page_end(dst_argb_opt, kSize + off);
  2251. MemRandomize(dst_argb_c + off, kSize);
  2252. memcpy(dst_argb_opt + off, dst_argb_c + off, kSize);
  2253. MaskCpuFlags(disable_cpu_flags);
  2254. if (bpp == 4) {
  2255. ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32);
  2256. } else {
  2257. SetPlane(dst_argb_c + off, kStride, width, invert * height, v32);
  2258. }
  2259. MaskCpuFlags(benchmark_cpu_info);
  2260. for (int i = 0; i < benchmark_iterations; ++i) {
  2261. if (bpp == 4) {
  2262. ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32);
  2263. } else {
  2264. SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32);
  2265. }
  2266. }
  2267. int max_diff = 0;
  2268. for (int i = 0; i < kStride * height; ++i) {
  2269. int abs_diff = abs(static_cast<int>(dst_argb_c[i + off]) -
  2270. static_cast<int>(dst_argb_opt[i + off]));
  2271. if (abs_diff > max_diff) {
  2272. max_diff = abs_diff;
  2273. }
  2274. }
  2275. free_aligned_buffer_page_end(dst_argb_c);
  2276. free_aligned_buffer_page_end(dst_argb_opt);
  2277. return max_diff;
  2278. }
  2279. TEST_F(LibYUVPlanarTest, ARGBRect_Any) {
  2280. int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
  2281. benchmark_iterations_, disable_cpu_flags_,
  2282. benchmark_cpu_info_, +1, 0, 4);
  2283. EXPECT_EQ(0, max_diff);
  2284. }
  2285. TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) {
  2286. int max_diff =
  2287. TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
  2288. disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 4);
  2289. EXPECT_EQ(0, max_diff);
  2290. }
  2291. TEST_F(LibYUVPlanarTest, ARGBRect_Invert) {
  2292. int max_diff =
  2293. TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
  2294. disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 4);
  2295. EXPECT_EQ(0, max_diff);
  2296. }
  2297. TEST_F(LibYUVPlanarTest, ARGBRect_Opt) {
  2298. int max_diff =
  2299. TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
  2300. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4);
  2301. EXPECT_EQ(0, max_diff);
  2302. }
  2303. TEST_F(LibYUVPlanarTest, SetPlane_Any) {
  2304. int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
  2305. benchmark_iterations_, disable_cpu_flags_,
  2306. benchmark_cpu_info_, +1, 0, 1);
  2307. EXPECT_EQ(0, max_diff);
  2308. }
  2309. TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) {
  2310. int max_diff =
  2311. TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
  2312. disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
  2313. EXPECT_EQ(0, max_diff);
  2314. }
  2315. TEST_F(LibYUVPlanarTest, SetPlane_Invert) {
  2316. int max_diff =
  2317. TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
  2318. disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
  2319. EXPECT_EQ(0, max_diff);
  2320. }
  2321. TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
  2322. int max_diff =
  2323. TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
  2324. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
  2325. EXPECT_EQ(0, max_diff);
  2326. }
  2327. TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
  2328. // Round count up to multiple of 16
  2329. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2330. align_buffer_page_end(src_pixels, kPixels * 2);
  2331. align_buffer_page_end(tmp_pixels_u, kPixels);
  2332. align_buffer_page_end(tmp_pixels_v, kPixels);
  2333. align_buffer_page_end(dst_pixels_opt, kPixels * 2);
  2334. align_buffer_page_end(dst_pixels_c, kPixels * 2);
  2335. MemRandomize(src_pixels, kPixels * 2);
  2336. MemRandomize(tmp_pixels_u, kPixels);
  2337. MemRandomize(tmp_pixels_v, kPixels);
  2338. MemRandomize(dst_pixels_opt, kPixels * 2);
  2339. MemRandomize(dst_pixels_c, kPixels * 2);
  2340. MaskCpuFlags(disable_cpu_flags_);
  2341. SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
  2342. tmp_pixels_v, benchmark_width_, benchmark_width_,
  2343. benchmark_height_);
  2344. MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
  2345. dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
  2346. benchmark_height_);
  2347. MaskCpuFlags(benchmark_cpu_info_);
  2348. SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
  2349. tmp_pixels_v, benchmark_width_, benchmark_width_,
  2350. benchmark_height_);
  2351. for (int i = 0; i < benchmark_iterations_; ++i) {
  2352. MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
  2353. dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
  2354. benchmark_height_);
  2355. }
  2356. for (int i = 0; i < kPixels * 2; ++i) {
  2357. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2358. }
  2359. free_aligned_buffer_page_end(src_pixels);
  2360. free_aligned_buffer_page_end(tmp_pixels_u);
  2361. free_aligned_buffer_page_end(tmp_pixels_v);
  2362. free_aligned_buffer_page_end(dst_pixels_opt);
  2363. free_aligned_buffer_page_end(dst_pixels_c);
  2364. }
  2365. TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
  2366. // Round count up to multiple of 16
  2367. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2368. align_buffer_page_end(src_pixels, kPixels * 2);
  2369. align_buffer_page_end(tmp_pixels_u, kPixels);
  2370. align_buffer_page_end(tmp_pixels_v, kPixels);
  2371. align_buffer_page_end(dst_pixels_opt, kPixels * 2);
  2372. align_buffer_page_end(dst_pixels_c, kPixels * 2);
  2373. MemRandomize(src_pixels, kPixels * 2);
  2374. MemRandomize(tmp_pixels_u, kPixels);
  2375. MemRandomize(tmp_pixels_v, kPixels);
  2376. MemRandomize(dst_pixels_opt, kPixels * 2);
  2377. MemRandomize(dst_pixels_c, kPixels * 2);
  2378. MaskCpuFlags(disable_cpu_flags_);
  2379. SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
  2380. tmp_pixels_v, benchmark_width_, benchmark_width_,
  2381. benchmark_height_);
  2382. MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
  2383. dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
  2384. benchmark_height_);
  2385. MaskCpuFlags(benchmark_cpu_info_);
  2386. for (int i = 0; i < benchmark_iterations_; ++i) {
  2387. SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u,
  2388. benchmark_width_, tmp_pixels_v, benchmark_width_,
  2389. benchmark_width_, benchmark_height_);
  2390. }
  2391. MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
  2392. dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
  2393. benchmark_height_);
  2394. for (int i = 0; i < kPixels * 2; ++i) {
  2395. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2396. }
  2397. free_aligned_buffer_page_end(src_pixels);
  2398. free_aligned_buffer_page_end(tmp_pixels_u);
  2399. free_aligned_buffer_page_end(tmp_pixels_v);
  2400. free_aligned_buffer_page_end(dst_pixels_opt);
  2401. free_aligned_buffer_page_end(dst_pixels_c);
  2402. }
  2403. TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
  2404. // Round count up to multiple of 16
  2405. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2406. align_buffer_page_end(src_pixels, kPixels * 2);
  2407. align_buffer_page_end(dst_pixels_opt, kPixels * 2);
  2408. align_buffer_page_end(dst_pixels_c, kPixels * 2);
  2409. MemRandomize(src_pixels, kPixels * 2);
  2410. MemRandomize(dst_pixels_opt, kPixels * 2);
  2411. MemRandomize(dst_pixels_c, kPixels * 2);
  2412. MaskCpuFlags(disable_cpu_flags_);
  2413. SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
  2414. benchmark_width_ * 2, benchmark_width_, benchmark_height_);
  2415. MaskCpuFlags(benchmark_cpu_info_);
  2416. for (int i = 0; i < benchmark_iterations_; ++i) {
  2417. SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
  2418. benchmark_width_ * 2, benchmark_width_, benchmark_height_);
  2419. }
  2420. for (int i = 0; i < kPixels * 2; ++i) {
  2421. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2422. }
  2423. free_aligned_buffer_page_end(src_pixels);
  2424. free_aligned_buffer_page_end(dst_pixels_opt);
  2425. free_aligned_buffer_page_end(dst_pixels_c);
  2426. }
  2427. TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
  2428. // Round count up to multiple of 16
  2429. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2430. align_buffer_page_end(src_pixels, kPixels * 3);
  2431. align_buffer_page_end(tmp_pixels_r, kPixels);
  2432. align_buffer_page_end(tmp_pixels_g, kPixels);
  2433. align_buffer_page_end(tmp_pixels_b, kPixels);
  2434. align_buffer_page_end(dst_pixels_opt, kPixels * 3);
  2435. align_buffer_page_end(dst_pixels_c, kPixels * 3);
  2436. MemRandomize(src_pixels, kPixels * 3);
  2437. MemRandomize(tmp_pixels_r, kPixels);
  2438. MemRandomize(tmp_pixels_g, kPixels);
  2439. MemRandomize(tmp_pixels_b, kPixels);
  2440. MemRandomize(dst_pixels_opt, kPixels * 3);
  2441. MemRandomize(dst_pixels_c, kPixels * 3);
  2442. MaskCpuFlags(disable_cpu_flags_);
  2443. SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
  2444. benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
  2445. benchmark_width_, benchmark_width_, benchmark_height_);
  2446. MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
  2447. tmp_pixels_b, benchmark_width_, dst_pixels_c,
  2448. benchmark_width_ * 3, benchmark_width_, benchmark_height_);
  2449. MaskCpuFlags(benchmark_cpu_info_);
  2450. SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
  2451. benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
  2452. benchmark_width_, benchmark_width_, benchmark_height_);
  2453. for (int i = 0; i < benchmark_iterations_; ++i) {
  2454. MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
  2455. benchmark_width_, tmp_pixels_b, benchmark_width_,
  2456. dst_pixels_opt, benchmark_width_ * 3, benchmark_width_,
  2457. benchmark_height_);
  2458. }
  2459. for (int i = 0; i < kPixels * 3; ++i) {
  2460. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2461. }
  2462. free_aligned_buffer_page_end(src_pixels);
  2463. free_aligned_buffer_page_end(tmp_pixels_r);
  2464. free_aligned_buffer_page_end(tmp_pixels_g);
  2465. free_aligned_buffer_page_end(tmp_pixels_b);
  2466. free_aligned_buffer_page_end(dst_pixels_opt);
  2467. free_aligned_buffer_page_end(dst_pixels_c);
  2468. }
  2469. TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
  2470. // Round count up to multiple of 16
  2471. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2472. align_buffer_page_end(src_pixels, kPixels * 3);
  2473. align_buffer_page_end(tmp_pixels_r, kPixels);
  2474. align_buffer_page_end(tmp_pixels_g, kPixels);
  2475. align_buffer_page_end(tmp_pixels_b, kPixels);
  2476. align_buffer_page_end(dst_pixels_opt, kPixels * 3);
  2477. align_buffer_page_end(dst_pixels_c, kPixels * 3);
  2478. MemRandomize(src_pixels, kPixels * 3);
  2479. MemRandomize(tmp_pixels_r, kPixels);
  2480. MemRandomize(tmp_pixels_g, kPixels);
  2481. MemRandomize(tmp_pixels_b, kPixels);
  2482. MemRandomize(dst_pixels_opt, kPixels * 3);
  2483. MemRandomize(dst_pixels_c, kPixels * 3);
  2484. MaskCpuFlags(disable_cpu_flags_);
  2485. SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
  2486. benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
  2487. benchmark_width_, benchmark_width_, benchmark_height_);
  2488. MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
  2489. tmp_pixels_b, benchmark_width_, dst_pixels_c,
  2490. benchmark_width_ * 3, benchmark_width_, benchmark_height_);
  2491. MaskCpuFlags(benchmark_cpu_info_);
  2492. for (int i = 0; i < benchmark_iterations_; ++i) {
  2493. SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
  2494. benchmark_width_, tmp_pixels_g, benchmark_width_,
  2495. tmp_pixels_b, benchmark_width_, benchmark_width_,
  2496. benchmark_height_);
  2497. }
  2498. MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
  2499. tmp_pixels_b, benchmark_width_, dst_pixels_opt,
  2500. benchmark_width_ * 3, benchmark_width_, benchmark_height_);
  2501. for (int i = 0; i < kPixels * 3; ++i) {
  2502. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2503. }
  2504. free_aligned_buffer_page_end(src_pixels);
  2505. free_aligned_buffer_page_end(tmp_pixels_r);
  2506. free_aligned_buffer_page_end(tmp_pixels_g);
  2507. free_aligned_buffer_page_end(tmp_pixels_b);
  2508. free_aligned_buffer_page_end(dst_pixels_opt);
  2509. free_aligned_buffer_page_end(dst_pixels_c);
  2510. }
  2511. // TODO(fbarchard): improve test for platforms and cpu detect
  2512. #ifdef HAS_MERGEUVROW_16_AVX2
  2513. TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
  2514. // Round count up to multiple of 16
  2515. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2516. align_buffer_page_end(src_pixels_u, kPixels * 2);
  2517. align_buffer_page_end(src_pixels_v, kPixels * 2);
  2518. align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
  2519. align_buffer_page_end(dst_pixels_uv_c, kPixels * 2 * 2);
  2520. MemRandomize(src_pixels_u, kPixels * 2);
  2521. MemRandomize(src_pixels_v, kPixels * 2);
  2522. memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2);
  2523. memset(dst_pixels_uv_c, 1, kPixels * 2 * 2);
  2524. MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
  2525. reinterpret_cast<const uint16_t*>(src_pixels_v),
  2526. reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 64, kPixels);
  2527. int has_avx2 = TestCpuFlag(kCpuHasAVX2);
  2528. for (int i = 0; i < benchmark_iterations_; ++i) {
  2529. if (has_avx2) {
  2530. MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u),
  2531. reinterpret_cast<const uint16_t*>(src_pixels_v),
  2532. reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
  2533. kPixels);
  2534. } else {
  2535. MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
  2536. reinterpret_cast<const uint16_t*>(src_pixels_v),
  2537. reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
  2538. kPixels);
  2539. }
  2540. }
  2541. for (int i = 0; i < kPixels * 2 * 2; ++i) {
  2542. EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]);
  2543. }
  2544. free_aligned_buffer_page_end(src_pixels_u);
  2545. free_aligned_buffer_page_end(src_pixels_v);
  2546. free_aligned_buffer_page_end(dst_pixels_uv_opt);
  2547. free_aligned_buffer_page_end(dst_pixels_uv_c);
  2548. }
  2549. #endif
  2550. // TODO(fbarchard): Improve test for more platforms.
  2551. #ifdef HAS_MULTIPLYROW_16_AVX2
  2552. TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
  2553. // Round count up to multiple of 16
  2554. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2555. align_buffer_page_end(src_pixels_y, kPixels * 2);
  2556. align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
  2557. align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
  2558. MemRandomize(src_pixels_y, kPixels * 2);
  2559. memset(dst_pixels_y_opt, 0, kPixels * 2);
  2560. memset(dst_pixels_y_c, 1, kPixels * 2);
  2561. MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2562. reinterpret_cast<uint16_t*>(dst_pixels_y_c), 64, kPixels);
  2563. int has_avx2 = TestCpuFlag(kCpuHasAVX2);
  2564. for (int i = 0; i < benchmark_iterations_; ++i) {
  2565. if (has_avx2) {
  2566. MultiplyRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2567. reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
  2568. kPixels);
  2569. } else {
  2570. MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2571. reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
  2572. kPixels);
  2573. }
  2574. }
  2575. for (int i = 0; i < kPixels * 2; ++i) {
  2576. EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
  2577. }
  2578. free_aligned_buffer_page_end(src_pixels_y);
  2579. free_aligned_buffer_page_end(dst_pixels_y_opt);
  2580. free_aligned_buffer_page_end(dst_pixels_y_c);
  2581. }
  2582. #endif // HAS_MULTIPLYROW_16_AVX2
  2583. TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
  2584. // Round count up to multiple of 16
  2585. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2586. align_buffer_page_end(src_pixels_y, kPixels * 2);
  2587. align_buffer_page_end(dst_pixels_y_opt, kPixels);
  2588. align_buffer_page_end(dst_pixels_y_c, kPixels);
  2589. MemRandomize(src_pixels_y, kPixels * 2);
  2590. memset(dst_pixels_y_opt, 0, kPixels);
  2591. memset(dst_pixels_y_c, 1, kPixels);
  2592. MaskCpuFlags(disable_cpu_flags_);
  2593. Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2594. benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384,
  2595. benchmark_width_, benchmark_height_);
  2596. MaskCpuFlags(benchmark_cpu_info_);
  2597. for (int i = 0; i < benchmark_iterations_; ++i) {
  2598. Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2599. benchmark_width_, dst_pixels_y_opt, benchmark_width_,
  2600. 16384, benchmark_width_, benchmark_height_);
  2601. }
  2602. for (int i = 0; i < kPixels; ++i) {
  2603. EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
  2604. }
  2605. free_aligned_buffer_page_end(src_pixels_y);
  2606. free_aligned_buffer_page_end(dst_pixels_y_opt);
  2607. free_aligned_buffer_page_end(dst_pixels_y_c);
  2608. }
  2609. #ifdef ENABLE_ROW_TESTS
  2610. // TODO(fbarchard): Improve test for more platforms.
  2611. #ifdef HAS_CONVERT16TO8ROW_AVX2
  2612. TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
  2613. // AVX2 does multiple of 32, so round count up
  2614. const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
  2615. align_buffer_page_end(src_pixels_y, kPixels * 2);
  2616. align_buffer_page_end(dst_pixels_y_opt, kPixels);
  2617. align_buffer_page_end(dst_pixels_y_c, kPixels);
  2618. MemRandomize(src_pixels_y, kPixels * 2);
  2619. // clamp source range to 10 bits.
  2620. for (int i = 0; i < kPixels; ++i) {
  2621. reinterpret_cast<uint16_t*>(src_pixels_y)[i] &= 1023;
  2622. }
  2623. memset(dst_pixels_y_opt, 0, kPixels);
  2624. memset(dst_pixels_y_c, 1, kPixels);
  2625. Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2626. dst_pixels_y_c, 16384, kPixels);
  2627. int has_avx2 = TestCpuFlag(kCpuHasAVX2);
  2628. int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
  2629. for (int i = 0; i < benchmark_iterations_; ++i) {
  2630. if (has_avx2) {
  2631. Convert16To8Row_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2632. dst_pixels_y_opt, 16384, kPixels);
  2633. } else if (has_ssse3) {
  2634. Convert16To8Row_SSSE3(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2635. dst_pixels_y_opt, 16384, kPixels);
  2636. } else {
  2637. Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2638. dst_pixels_y_opt, 16384, kPixels);
  2639. }
  2640. }
  2641. for (int i = 0; i < kPixels; ++i) {
  2642. EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
  2643. }
  2644. free_aligned_buffer_page_end(src_pixels_y);
  2645. free_aligned_buffer_page_end(dst_pixels_y_opt);
  2646. free_aligned_buffer_page_end(dst_pixels_y_c);
  2647. }
  2648. #endif // HAS_CONVERT16TO8ROW_AVX2
  2649. #endif // ENABLE_ROW_TESTS
  2650. TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
  2651. // Round count up to multiple of 16
  2652. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2653. align_buffer_page_end(src_pixels_y, kPixels);
  2654. align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
  2655. align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
  2656. MemRandomize(src_pixels_y, kPixels);
  2657. memset(dst_pixels_y_opt, 0, kPixels * 2);
  2658. memset(dst_pixels_y_c, 1, kPixels * 2);
  2659. MaskCpuFlags(disable_cpu_flags_);
  2660. Convert8To16Plane(src_pixels_y, benchmark_width_,
  2661. reinterpret_cast<uint16_t*>(dst_pixels_y_c),
  2662. benchmark_width_, 1024, benchmark_width_,
  2663. benchmark_height_);
  2664. MaskCpuFlags(benchmark_cpu_info_);
  2665. for (int i = 0; i < benchmark_iterations_; ++i) {
  2666. Convert8To16Plane(src_pixels_y, benchmark_width_,
  2667. reinterpret_cast<uint16_t*>(dst_pixels_y_opt),
  2668. benchmark_width_, 1024, benchmark_width_,
  2669. benchmark_height_);
  2670. }
  2671. for (int i = 0; i < kPixels * 2; ++i) {
  2672. EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
  2673. }
  2674. free_aligned_buffer_page_end(src_pixels_y);
  2675. free_aligned_buffer_page_end(dst_pixels_y_opt);
  2676. free_aligned_buffer_page_end(dst_pixels_y_c);
  2677. }
  2678. #ifdef ENABLE_ROW_TESTS
  2679. // TODO(fbarchard): Improve test for more platforms.
  2680. #ifdef HAS_CONVERT8TO16ROW_AVX2
  2681. TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
  2682. const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
  2683. align_buffer_page_end(src_pixels_y, kPixels);
  2684. align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
  2685. align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
  2686. MemRandomize(src_pixels_y, kPixels);
  2687. memset(dst_pixels_y_opt, 0, kPixels * 2);
  2688. memset(dst_pixels_y_c, 1, kPixels * 2);
  2689. Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16_t*>(dst_pixels_y_c),
  2690. 1024, kPixels);
  2691. int has_avx2 = TestCpuFlag(kCpuHasAVX2);
  2692. int has_sse2 = TestCpuFlag(kCpuHasSSE2);
  2693. for (int i = 0; i < benchmark_iterations_; ++i) {
  2694. if (has_avx2) {
  2695. Convert8To16Row_AVX2(src_pixels_y,
  2696. reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
  2697. kPixels);
  2698. } else if (has_sse2) {
  2699. Convert8To16Row_SSE2(src_pixels_y,
  2700. reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
  2701. kPixels);
  2702. } else {
  2703. Convert8To16Row_C(src_pixels_y,
  2704. reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
  2705. kPixels);
  2706. }
  2707. }
  2708. for (int i = 0; i < kPixels * 2; ++i) {
  2709. EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
  2710. }
  2711. free_aligned_buffer_page_end(src_pixels_y);
  2712. free_aligned_buffer_page_end(dst_pixels_y_opt);
  2713. free_aligned_buffer_page_end(dst_pixels_y_c);
  2714. }
  2715. #endif // HAS_CONVERT8TO16ROW_AVX2
  2716. float TestScaleMaxSamples(int benchmark_width,
  2717. int benchmark_height,
  2718. int benchmark_iterations,
  2719. float scale,
  2720. bool opt) {
  2721. int i, j;
  2722. float max_c, max_opt = 0.f;
  2723. // NEON does multiple of 8, so round count up
  2724. const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
  2725. align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48);
  2726. uint8_t* dst_c = orig_y + kPixels * 4 + 16;
  2727. uint8_t* dst_opt = orig_y + kPixels * 4 * 2 + 32;
  2728. // Randomize works but may contain some denormals affecting performance.
  2729. // MemRandomize(orig_y, kPixels * 4);
  2730. // large values are problematic. audio is really -1 to 1.
  2731. for (i = 0; i < kPixels; ++i) {
  2732. (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
  2733. }
  2734. memset(dst_c, 0, kPixels * 4);
  2735. memset(dst_opt, 1, kPixels * 4);
  2736. max_c = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
  2737. reinterpret_cast<float*>(dst_c), scale, kPixels);
  2738. for (j = 0; j < benchmark_iterations; j++) {
  2739. if (opt) {
  2740. #ifdef HAS_SCALESUMSAMPLES_NEON
  2741. max_opt = ScaleMaxSamples_NEON(reinterpret_cast<float*>(orig_y),
  2742. reinterpret_cast<float*>(dst_opt), scale,
  2743. kPixels);
  2744. #else
  2745. max_opt =
  2746. ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
  2747. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2748. #endif
  2749. } else {
  2750. max_opt =
  2751. ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
  2752. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2753. }
  2754. }
  2755. float max_diff = FAbs(max_opt - max_c);
  2756. for (i = 0; i < kPixels; ++i) {
  2757. float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
  2758. (reinterpret_cast<float*>(dst_opt)[i]));
  2759. if (abs_diff > max_diff) {
  2760. max_diff = abs_diff;
  2761. }
  2762. }
  2763. free_aligned_buffer_page_end(orig_y);
  2764. return max_diff;
  2765. }
  2766. TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_C) {
  2767. float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
  2768. benchmark_iterations_, 1.2f, false);
  2769. EXPECT_EQ(0, diff);
  2770. }
  2771. TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_Opt) {
  2772. float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
  2773. benchmark_iterations_, 1.2f, true);
  2774. EXPECT_EQ(0, diff);
  2775. }
  2776. float TestScaleSumSamples(int benchmark_width,
  2777. int benchmark_height,
  2778. int benchmark_iterations,
  2779. float scale,
  2780. bool opt) {
  2781. int i, j;
  2782. float sum_c, sum_opt = 0.f;
  2783. // NEON does multiple of 8, so round count up
  2784. const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
  2785. align_buffer_page_end(orig_y, kPixels * 4 * 3);
  2786. uint8_t* dst_c = orig_y + kPixels * 4;
  2787. uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
  2788. // Randomize works but may contain some denormals affecting performance.
  2789. // MemRandomize(orig_y, kPixels * 4);
  2790. // large values are problematic. audio is really -1 to 1.
  2791. for (i = 0; i < kPixels; ++i) {
  2792. (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
  2793. }
  2794. memset(dst_c, 0, kPixels * 4);
  2795. memset(dst_opt, 1, kPixels * 4);
  2796. sum_c = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
  2797. reinterpret_cast<float*>(dst_c), scale, kPixels);
  2798. for (j = 0; j < benchmark_iterations; j++) {
  2799. if (opt) {
  2800. #ifdef HAS_SCALESUMSAMPLES_NEON
  2801. sum_opt = ScaleSumSamples_NEON(reinterpret_cast<float*>(orig_y),
  2802. reinterpret_cast<float*>(dst_opt), scale,
  2803. kPixels);
  2804. #else
  2805. sum_opt =
  2806. ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
  2807. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2808. #endif
  2809. } else {
  2810. sum_opt =
  2811. ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
  2812. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2813. }
  2814. }
  2815. float mse_opt = sum_opt / kPixels * 4;
  2816. float mse_c = sum_c / kPixels * 4;
  2817. float mse_error = FAbs(mse_opt - mse_c) / mse_c;
  2818. // If the sum of a float is more than 4 million, small adds are round down on
  2819. // float and produce different results with vectorized sum vs scalar sum.
  2820. // Ignore the difference if the sum is large.
  2821. float max_diff = 0.f;
  2822. if (mse_error > 0.0001 && sum_c < 4000000) { // allow .01% difference of mse
  2823. max_diff = mse_error;
  2824. }
  2825. for (i = 0; i < kPixels; ++i) {
  2826. float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
  2827. (reinterpret_cast<float*>(dst_opt)[i]));
  2828. if (abs_diff > max_diff) {
  2829. max_diff = abs_diff;
  2830. }
  2831. }
  2832. free_aligned_buffer_page_end(orig_y);
  2833. return max_diff;
  2834. }
  2835. TEST_F(LibYUVPlanarTest, TestScaleSumSamples_C) {
  2836. float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
  2837. benchmark_iterations_, 1.2f, false);
  2838. EXPECT_EQ(0, diff);
  2839. }
  2840. TEST_F(LibYUVPlanarTest, TestScaleSumSamples_Opt) {
  2841. float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
  2842. benchmark_iterations_, 1.2f, true);
  2843. EXPECT_EQ(0, diff);
  2844. }
  2845. float TestScaleSamples(int benchmark_width,
  2846. int benchmark_height,
  2847. int benchmark_iterations,
  2848. float scale,
  2849. bool opt) {
  2850. int i, j;
  2851. // NEON does multiple of 8, so round count up
  2852. const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
  2853. align_buffer_page_end(orig_y, kPixels * 4 * 3);
  2854. uint8_t* dst_c = orig_y + kPixels * 4;
  2855. uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
  2856. // Randomize works but may contain some denormals affecting performance.
  2857. // MemRandomize(orig_y, kPixels * 4);
  2858. // large values are problematic. audio is really -1 to 1.
  2859. for (i = 0; i < kPixels; ++i) {
  2860. (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
  2861. }
  2862. memset(dst_c, 0, kPixels * 4);
  2863. memset(dst_opt, 1, kPixels * 4);
  2864. ScaleSamples_C(reinterpret_cast<float*>(orig_y),
  2865. reinterpret_cast<float*>(dst_c), scale, kPixels);
  2866. for (j = 0; j < benchmark_iterations; j++) {
  2867. if (opt) {
  2868. #ifdef HAS_SCALESUMSAMPLES_NEON
  2869. ScaleSamples_NEON(reinterpret_cast<float*>(orig_y),
  2870. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2871. #else
  2872. ScaleSamples_C(reinterpret_cast<float*>(orig_y),
  2873. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2874. #endif
  2875. } else {
  2876. ScaleSamples_C(reinterpret_cast<float*>(orig_y),
  2877. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2878. }
  2879. }
  2880. float max_diff = 0.f;
  2881. for (i = 0; i < kPixels; ++i) {
  2882. float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
  2883. (reinterpret_cast<float*>(dst_opt)[i]));
  2884. if (abs_diff > max_diff) {
  2885. max_diff = abs_diff;
  2886. }
  2887. }
  2888. free_aligned_buffer_page_end(orig_y);
  2889. return max_diff;
  2890. }
  2891. TEST_F(LibYUVPlanarTest, TestScaleSamples_C) {
  2892. float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
  2893. benchmark_iterations_, 1.2f, false);
  2894. EXPECT_EQ(0, diff);
  2895. }
  2896. TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) {
  2897. float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
  2898. benchmark_iterations_, 1.2f, true);
  2899. EXPECT_EQ(0, diff);
  2900. }
  2901. float TestCopySamples(int benchmark_width,
  2902. int benchmark_height,
  2903. int benchmark_iterations,
  2904. bool opt) {
  2905. int i, j;
  2906. // NEON does multiple of 16 floats, so round count up
  2907. const int kPixels = (benchmark_width * benchmark_height + 15) & ~15;
  2908. align_buffer_page_end(orig_y, kPixels * 4 * 3);
  2909. uint8_t* dst_c = orig_y + kPixels * 4;
  2910. uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
  2911. // Randomize works but may contain some denormals affecting performance.
  2912. // MemRandomize(orig_y, kPixels * 4);
  2913. // large values are problematic. audio is really -1 to 1.
  2914. for (i = 0; i < kPixels; ++i) {
  2915. (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
  2916. }
  2917. memset(dst_c, 0, kPixels * 4);
  2918. memset(dst_opt, 1, kPixels * 4);
  2919. memcpy(reinterpret_cast<void*>(dst_c), reinterpret_cast<void*>(orig_y),
  2920. kPixels * 4);
  2921. for (j = 0; j < benchmark_iterations; j++) {
  2922. if (opt) {
  2923. #ifdef HAS_COPYROW_NEON
  2924. CopyRow_NEON(orig_y, dst_opt, kPixels * 4);
  2925. #else
  2926. CopyRow_C(orig_y, dst_opt, kPixels * 4);
  2927. #endif
  2928. } else {
  2929. CopyRow_C(orig_y, dst_opt, kPixels * 4);
  2930. }
  2931. }
  2932. float max_diff = 0.f;
  2933. for (i = 0; i < kPixels; ++i) {
  2934. float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
  2935. (reinterpret_cast<float*>(dst_opt)[i]));
  2936. if (abs_diff > max_diff) {
  2937. max_diff = abs_diff;
  2938. }
  2939. }
  2940. free_aligned_buffer_page_end(orig_y);
  2941. return max_diff;
  2942. }
  2943. TEST_F(LibYUVPlanarTest, TestCopySamples_C) {
  2944. float diff = TestCopySamples(benchmark_width_, benchmark_height_,
  2945. benchmark_iterations_, false);
  2946. EXPECT_EQ(0, diff);
  2947. }
  2948. TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) {
  2949. float diff = TestCopySamples(benchmark_width_, benchmark_height_,
  2950. benchmark_iterations_, true);
  2951. EXPECT_EQ(0, diff);
  2952. }
  2953. extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width);
  2954. extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
  2955. TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
  2956. SIMD_ALIGNED(uint32_t orig_pixels[1280 + 8]);
  2957. SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
  2958. SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
  2959. memset(orig_pixels, 0, sizeof(orig_pixels));
  2960. memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
  2961. memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
  2962. for (int i = 0; i < 1280 + 8; ++i) {
  2963. orig_pixels[i] = i * 256;
  2964. }
  2965. GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
  2966. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  2967. #if !defined(LIBYUV_DISABLE_NEON) && \
  2968. (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
  2969. int has_neon = TestCpuFlag(kCpuHasNEON);
  2970. if (has_neon) {
  2971. GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
  2972. } else {
  2973. GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
  2974. }
  2975. #else
  2976. GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
  2977. #endif
  2978. }
  2979. for (int i = 0; i < 1280; ++i) {
  2980. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2981. }
  2982. EXPECT_EQ(dst_pixels_c[0],
  2983. static_cast<uint16_t>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1));
  2984. EXPECT_EQ(dst_pixels_c[639], static_cast<uint16_t>(10256));
  2985. }
  2986. extern "C" void GaussCol_NEON(const uint16_t* src0,
  2987. const uint16_t* src1,
  2988. const uint16_t* src2,
  2989. const uint16_t* src3,
  2990. const uint16_t* src4,
  2991. uint32_t* dst,
  2992. int width);
  2993. extern "C" void GaussCol_C(const uint16_t* src0,
  2994. const uint16_t* src1,
  2995. const uint16_t* src2,
  2996. const uint16_t* src3,
  2997. const uint16_t* src4,
  2998. uint32_t* dst,
  2999. int width);
  3000. TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
  3001. SIMD_ALIGNED(uint16_t orig_pixels[1280 * 5]);
  3002. SIMD_ALIGNED(uint32_t dst_pixels_c[1280]);
  3003. SIMD_ALIGNED(uint32_t dst_pixels_opt[1280]);
  3004. memset(orig_pixels, 0, sizeof(orig_pixels));
  3005. memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
  3006. memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
  3007. for (int i = 0; i < 1280 * 5; ++i) {
  3008. orig_pixels[i] = static_cast<float>(i);
  3009. }
  3010. GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
  3011. &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], &dst_pixels_c[0],
  3012. 1280);
  3013. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  3014. #if !defined(LIBYUV_DISABLE_NEON) && \
  3015. (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
  3016. int has_neon = TestCpuFlag(kCpuHasNEON);
  3017. if (has_neon) {
  3018. GaussCol_NEON(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
  3019. &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
  3020. &dst_pixels_opt[0], 1280);
  3021. } else {
  3022. GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
  3023. &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
  3024. &dst_pixels_opt[0], 1280);
  3025. }
  3026. #else
  3027. GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
  3028. &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
  3029. &dst_pixels_opt[0], 1280);
  3030. #endif
  3031. }
  3032. for (int i = 0; i < 1280; ++i) {
  3033. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  3034. }
  3035. }
  3036. TEST_F(LibYUVPlanarTest, TestGaussRow_F32_Opt) {
  3037. SIMD_ALIGNED(float orig_pixels[1280 + 4]);
  3038. SIMD_ALIGNED(float dst_pixels_c[1280]);
  3039. SIMD_ALIGNED(float dst_pixels_opt[1280]);
  3040. memset(orig_pixels, 0, sizeof(orig_pixels));
  3041. memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
  3042. memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
  3043. for (int i = 0; i < 1280 + 4; ++i) {
  3044. orig_pixels[i] = static_cast<float>(i);
  3045. }
  3046. GaussRow_F32_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
  3047. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  3048. #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
  3049. int has_neon = TestCpuFlag(kCpuHasNEON);
  3050. if (has_neon) {
  3051. GaussRow_F32_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
  3052. } else {
  3053. GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
  3054. }
  3055. #else
  3056. GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
  3057. #endif
  3058. }
  3059. for (int i = 0; i < 1280; ++i) {
  3060. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  3061. }
  3062. }
  3063. TEST_F(LibYUVPlanarTest, TestGaussCol_F32_Opt) {
  3064. SIMD_ALIGNED(float dst_pixels_c[1280]);
  3065. SIMD_ALIGNED(float dst_pixels_opt[1280]);
  3066. align_buffer_page_end(orig_pixels_buf, 1280 * 5 * 4); // 5 rows
  3067. float* orig_pixels = reinterpret_cast<float*>(orig_pixels_buf);
  3068. memset(orig_pixels, 0, 1280 * 5 * 4);
  3069. memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
  3070. memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
  3071. for (int i = 0; i < 1280 * 5; ++i) {
  3072. orig_pixels[i] = static_cast<float>(i);
  3073. }
  3074. GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
  3075. &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
  3076. &dst_pixels_c[0], 1280);
  3077. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  3078. #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
  3079. int has_neon = TestCpuFlag(kCpuHasNEON);
  3080. if (has_neon) {
  3081. GaussCol_F32_NEON(&orig_pixels[0], &orig_pixels[1280],
  3082. &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
  3083. &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
  3084. } else {
  3085. GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280],
  3086. &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
  3087. &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
  3088. }
  3089. #else
  3090. GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
  3091. &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
  3092. &dst_pixels_opt[0], 1280);
  3093. #endif
  3094. }
  3095. for (int i = 0; i < 1280; ++i) {
  3096. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  3097. }
  3098. free_aligned_buffer_page_end(orig_pixels_buf);
  3099. }
  3100. TEST_F(LibYUVPlanarTest, SwapUVRow) {
  3101. const int kPixels = benchmark_width_ * benchmark_height_;
  3102. void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
  3103. SwapUVRow_C;
  3104. align_buffer_page_end(src_pixels_vu, kPixels * 2);
  3105. align_buffer_page_end(dst_pixels_uv, kPixels * 2);
  3106. MemRandomize(src_pixels_vu, kPixels * 2);
  3107. memset(dst_pixels_uv, 1, kPixels * 2);
  3108. #if defined(HAS_SWAPUVROW_NEON)
  3109. if (TestCpuFlag(kCpuHasNEON)) {
  3110. SwapUVRow = SwapUVRow_Any_NEON;
  3111. if (IS_ALIGNED(kPixels, 16)) {
  3112. SwapUVRow = SwapUVRow_NEON;
  3113. }
  3114. }
  3115. #endif
  3116. for (int j = 0; j < benchmark_iterations_; j++) {
  3117. SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels);
  3118. }
  3119. for (int i = 0; i < kPixels; ++i) {
  3120. EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
  3121. EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
  3122. }
  3123. free_aligned_buffer_page_end(src_pixels_vu);
  3124. free_aligned_buffer_page_end(dst_pixels_uv);
  3125. }
  3126. #endif // ENABLE_ROW_TESTS
  3127. TEST_F(LibYUVPlanarTest, TestGaussPlane_F32) {
  3128. const int kSize = benchmark_width_ * benchmark_height_ * 4;
  3129. align_buffer_page_end(orig_pixels, kSize);
  3130. align_buffer_page_end(dst_pixels_opt, kSize);
  3131. align_buffer_page_end(dst_pixels_c, kSize);
  3132. for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
  3133. ((float*)(orig_pixels))[i] = (i & 1023) * 3.14f;
  3134. }
  3135. memset(dst_pixels_opt, 1, kSize);
  3136. memset(dst_pixels_c, 2, kSize);
  3137. MaskCpuFlags(disable_cpu_flags_);
  3138. GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
  3139. (float*)(dst_pixels_c), benchmark_width_, benchmark_width_,
  3140. benchmark_height_);
  3141. MaskCpuFlags(benchmark_cpu_info_);
  3142. for (int i = 0; i < benchmark_iterations_; ++i) {
  3143. GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
  3144. (float*)(dst_pixels_opt), benchmark_width_, benchmark_width_,
  3145. benchmark_height_);
  3146. }
  3147. for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
  3148. EXPECT_NEAR(((float*)(dst_pixels_c))[i], ((float*)(dst_pixels_opt))[i], 1.f)
  3149. << i;
  3150. }
  3151. free_aligned_buffer_page_end(dst_pixels_c);
  3152. free_aligned_buffer_page_end(dst_pixels_opt);
  3153. free_aligned_buffer_page_end(orig_pixels);
  3154. }
  3155. TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) {
  3156. int dst_width = (benchmark_width_ + 1) / 2;
  3157. int dst_height = (benchmark_height_ + 1) / 2;
  3158. align_buffer_page_end(src_pixels_u, benchmark_width_ * benchmark_height_);
  3159. align_buffer_page_end(src_pixels_v, benchmark_width_ * benchmark_height_);
  3160. align_buffer_page_end(tmp_pixels_u, dst_width * dst_height);
  3161. align_buffer_page_end(tmp_pixels_v, dst_width * dst_height);
  3162. align_buffer_page_end(dst_pixels_uv_opt, dst_width * 2 * dst_height);
  3163. align_buffer_page_end(dst_pixels_uv_c, dst_width * 2 * dst_height);
  3164. MemRandomize(src_pixels_u, benchmark_width_ * benchmark_height_);
  3165. MemRandomize(src_pixels_v, benchmark_width_ * benchmark_height_);
  3166. MemRandomize(tmp_pixels_u, dst_width * dst_height);
  3167. MemRandomize(tmp_pixels_v, dst_width * dst_height);
  3168. MemRandomize(dst_pixels_uv_opt, dst_width * 2 * dst_height);
  3169. MemRandomize(dst_pixels_uv_c, dst_width * 2 * dst_height);
  3170. MaskCpuFlags(disable_cpu_flags_);
  3171. HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
  3172. benchmark_width_, dst_pixels_uv_c, dst_width * 2,
  3173. benchmark_width_, benchmark_height_);
  3174. MaskCpuFlags(benchmark_cpu_info_);
  3175. for (int i = 0; i < benchmark_iterations_; ++i) {
  3176. HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
  3177. benchmark_width_, dst_pixels_uv_opt, dst_width * 2,
  3178. benchmark_width_, benchmark_height_);
  3179. }
  3180. for (int i = 0; i < dst_width * 2 * dst_height; ++i) {
  3181. EXPECT_EQ(dst_pixels_uv_c[i], dst_pixels_uv_opt[i]);
  3182. }
  3183. free_aligned_buffer_page_end(src_pixels_u);
  3184. free_aligned_buffer_page_end(src_pixels_v);
  3185. free_aligned_buffer_page_end(tmp_pixels_u);
  3186. free_aligned_buffer_page_end(tmp_pixels_v);
  3187. free_aligned_buffer_page_end(dst_pixels_uv_opt);
  3188. free_aligned_buffer_page_end(dst_pixels_uv_c);
  3189. }
  3190. } // namespace libyuv