row_common.cc 126 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849
  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/row.h"
  11. #include <stdio.h>
  12. #include <string.h> // For memcpy and memset.
  13. #include "libyuv/basic_types.h"
  14. #include "libyuv/convert_argb.h" // For kYuvI601Constants
  15. #ifdef __cplusplus
  16. namespace libyuv {
  17. extern "C" {
  18. #endif
  19. // The following ifdef from row_win makes the C code match the row_win code,
  20. // which is 7 bit fixed point.
  21. #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
  22. (defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__)))
  23. #define LIBYUV_RGB7 1
  24. #endif
  25. #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
  26. defined(_M_IX86)
  27. #define LIBYUV_ARGBTOUV_PAVGB 1
  28. #define LIBYUV_RGBTOU_TRUNCATE 1
  29. #endif
  30. // llvm x86 is poor at ternary operator, so use branchless min/max.
  31. #define USE_BRANCHLESS 1
  32. #if USE_BRANCHLESS
  33. static __inline int32_t clamp0(int32_t v) {
  34. return -(v >= 0) & v;
  35. }
  36. // TODO(fbarchard): make clamp255 preserve negative values.
  37. static __inline int32_t clamp255(int32_t v) {
  38. return (-(v >= 255) | v) & 255;
  39. }
  40. static __inline int32_t clamp1023(int32_t v) {
  41. return (-(v >= 1023) | v) & 1023;
  42. }
  43. static __inline uint32_t Abs(int32_t v) {
  44. int m = -(v < 0);
  45. return (v + m) ^ m;
  46. }
  47. #else // USE_BRANCHLESS
  48. static __inline int32_t clamp0(int32_t v) {
  49. return (v < 0) ? 0 : v;
  50. }
  51. static __inline int32_t clamp255(int32_t v) {
  52. return (v > 255) ? 255 : v;
  53. }
  54. static __inline int32_t clamp1023(int32_t v) {
  55. return (v > 1023) ? 1023 : v;
  56. }
  57. static __inline uint32_t Abs(int32_t v) {
  58. return (v < 0) ? -v : v;
  59. }
  60. #endif // USE_BRANCHLESS
  61. static __inline uint32_t Clamp(int32_t val) {
  62. int v = clamp0(val);
  63. return (uint32_t)(clamp255(v));
  64. }
  65. static __inline uint32_t Clamp10(int32_t val) {
  66. int v = clamp0(val);
  67. return (uint32_t)(clamp1023(v));
  68. }
  69. // Little Endian
  70. #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
  71. defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
  72. (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
  73. #define WRITEWORD(p, v) *(uint32_t*)(p) = v
  74. #else
  75. static inline void WRITEWORD(uint8_t* p, uint32_t v) {
  76. p[0] = (uint8_t)(v & 255);
  77. p[1] = (uint8_t)((v >> 8) & 255);
  78. p[2] = (uint8_t)((v >> 16) & 255);
  79. p[3] = (uint8_t)((v >> 24) & 255);
  80. }
  81. #endif
  82. void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
  83. int x;
  84. for (x = 0; x < width; ++x) {
  85. uint8_t b = src_rgb24[0];
  86. uint8_t g = src_rgb24[1];
  87. uint8_t r = src_rgb24[2];
  88. dst_argb[0] = b;
  89. dst_argb[1] = g;
  90. dst_argb[2] = r;
  91. dst_argb[3] = 255u;
  92. dst_argb += 4;
  93. src_rgb24 += 3;
  94. }
  95. }
  96. void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
  97. int x;
  98. for (x = 0; x < width; ++x) {
  99. uint8_t r = src_raw[0];
  100. uint8_t g = src_raw[1];
  101. uint8_t b = src_raw[2];
  102. dst_argb[0] = b;
  103. dst_argb[1] = g;
  104. dst_argb[2] = r;
  105. dst_argb[3] = 255u;
  106. dst_argb += 4;
  107. src_raw += 3;
  108. }
  109. }
  110. void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
  111. int x;
  112. for (x = 0; x < width; ++x) {
  113. uint8_t r = src_raw[0];
  114. uint8_t g = src_raw[1];
  115. uint8_t b = src_raw[2];
  116. dst_rgba[0] = 255u;
  117. dst_rgba[1] = b;
  118. dst_rgba[2] = g;
  119. dst_rgba[3] = r;
  120. dst_rgba += 4;
  121. src_raw += 3;
  122. }
  123. }
  124. void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
  125. int x;
  126. for (x = 0; x < width; ++x) {
  127. uint8_t r = src_raw[0];
  128. uint8_t g = src_raw[1];
  129. uint8_t b = src_raw[2];
  130. dst_rgb24[0] = b;
  131. dst_rgb24[1] = g;
  132. dst_rgb24[2] = r;
  133. dst_rgb24 += 3;
  134. src_raw += 3;
  135. }
  136. }
  137. void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
  138. uint8_t* dst_argb,
  139. int width) {
  140. int x;
  141. for (x = 0; x < width; ++x) {
  142. uint8_t b = src_rgb565[0] & 0x1f;
  143. uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
  144. uint8_t r = src_rgb565[1] >> 3;
  145. dst_argb[0] = (b << 3) | (b >> 2);
  146. dst_argb[1] = (g << 2) | (g >> 4);
  147. dst_argb[2] = (r << 3) | (r >> 2);
  148. dst_argb[3] = 255u;
  149. dst_argb += 4;
  150. src_rgb565 += 2;
  151. }
  152. }
  153. void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
  154. uint8_t* dst_argb,
  155. int width) {
  156. int x;
  157. for (x = 0; x < width; ++x) {
  158. uint8_t b = src_argb1555[0] & 0x1f;
  159. uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
  160. uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
  161. uint8_t a = src_argb1555[1] >> 7;
  162. dst_argb[0] = (b << 3) | (b >> 2);
  163. dst_argb[1] = (g << 3) | (g >> 2);
  164. dst_argb[2] = (r << 3) | (r >> 2);
  165. dst_argb[3] = -a;
  166. dst_argb += 4;
  167. src_argb1555 += 2;
  168. }
  169. }
  170. void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
  171. uint8_t* dst_argb,
  172. int width) {
  173. int x;
  174. for (x = 0; x < width; ++x) {
  175. uint8_t b = src_argb4444[0] & 0x0f;
  176. uint8_t g = src_argb4444[0] >> 4;
  177. uint8_t r = src_argb4444[1] & 0x0f;
  178. uint8_t a = src_argb4444[1] >> 4;
  179. dst_argb[0] = (b << 4) | b;
  180. dst_argb[1] = (g << 4) | g;
  181. dst_argb[2] = (r << 4) | r;
  182. dst_argb[3] = (a << 4) | a;
  183. dst_argb += 4;
  184. src_argb4444 += 2;
  185. }
  186. }
  187. void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
  188. int x;
  189. for (x = 0; x < width; ++x) {
  190. uint32_t ar30;
  191. memcpy(&ar30, src_ar30, sizeof ar30);
  192. uint32_t b = (ar30 >> 2) & 0xff;
  193. uint32_t g = (ar30 >> 12) & 0xff;
  194. uint32_t r = (ar30 >> 22) & 0xff;
  195. uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
  196. *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
  197. dst_argb += 4;
  198. src_ar30 += 4;
  199. }
  200. }
  201. void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
  202. int x;
  203. for (x = 0; x < width; ++x) {
  204. uint32_t ar30;
  205. memcpy(&ar30, src_ar30, sizeof ar30);
  206. uint32_t b = (ar30 >> 2) & 0xff;
  207. uint32_t g = (ar30 >> 12) & 0xff;
  208. uint32_t r = (ar30 >> 22) & 0xff;
  209. uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
  210. *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
  211. dst_abgr += 4;
  212. src_ar30 += 4;
  213. }
  214. }
  215. void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
  216. int x;
  217. for (x = 0; x < width; ++x) {
  218. uint32_t ar30;
  219. memcpy(&ar30, src_ar30, sizeof ar30);
  220. uint32_t b = ar30 & 0x3ff;
  221. uint32_t ga = ar30 & 0xc00ffc00;
  222. uint32_t r = (ar30 >> 20) & 0x3ff;
  223. *(uint32_t*)(dst_ab30) = r | ga | (b << 20);
  224. dst_ab30 += 4;
  225. src_ar30 += 4;
  226. }
  227. }
  228. void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
  229. int x;
  230. for (x = 0; x < width; ++x) {
  231. uint8_t b = src_argb[0];
  232. uint8_t g = src_argb[1];
  233. uint8_t r = src_argb[2];
  234. dst_rgb[0] = b;
  235. dst_rgb[1] = g;
  236. dst_rgb[2] = r;
  237. dst_rgb += 3;
  238. src_argb += 4;
  239. }
  240. }
  241. void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
  242. int x;
  243. for (x = 0; x < width; ++x) {
  244. uint8_t b = src_argb[0];
  245. uint8_t g = src_argb[1];
  246. uint8_t r = src_argb[2];
  247. dst_rgb[0] = r;
  248. dst_rgb[1] = g;
  249. dst_rgb[2] = b;
  250. dst_rgb += 3;
  251. src_argb += 4;
  252. }
  253. }
  254. void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
  255. int x;
  256. for (x = 0; x < width - 1; x += 2) {
  257. uint8_t b0 = src_argb[0] >> 3;
  258. uint8_t g0 = src_argb[1] >> 2;
  259. uint8_t r0 = src_argb[2] >> 3;
  260. uint8_t b1 = src_argb[4] >> 3;
  261. uint8_t g1 = src_argb[5] >> 2;
  262. uint8_t r1 = src_argb[6] >> 3;
  263. WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
  264. (r1 << 27));
  265. dst_rgb += 4;
  266. src_argb += 8;
  267. }
  268. if (width & 1) {
  269. uint8_t b0 = src_argb[0] >> 3;
  270. uint8_t g0 = src_argb[1] >> 2;
  271. uint8_t r0 = src_argb[2] >> 3;
  272. *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
  273. }
  274. }
  275. // dither4 is a row of 4 values from 4x4 dither matrix.
  276. // The 4x4 matrix contains values to increase RGB. When converting to
  277. // fewer bits (565) this provides an ordered dither.
  278. // The order in the 4x4 matrix in first byte is upper left.
  279. // The 4 values are passed as an int, then referenced as an array, so
  280. // endian will not affect order of the original matrix. But the dither4
  281. // will containing the first pixel in the lower byte for little endian
  282. // or the upper byte for big endian.
  283. void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
  284. uint8_t* dst_rgb,
  285. const uint32_t dither4,
  286. int width) {
  287. int x;
  288. for (x = 0; x < width - 1; x += 2) {
  289. int dither0 = ((const unsigned char*)(&dither4))[x & 3];
  290. int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
  291. uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
  292. uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
  293. uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
  294. uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
  295. uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
  296. uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
  297. WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
  298. (r1 << 27));
  299. dst_rgb += 4;
  300. src_argb += 8;
  301. }
  302. if (width & 1) {
  303. int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
  304. uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
  305. uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
  306. uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
  307. *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
  308. }
  309. }
  310. void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
  311. int x;
  312. for (x = 0; x < width - 1; x += 2) {
  313. uint8_t b0 = src_argb[0] >> 3;
  314. uint8_t g0 = src_argb[1] >> 3;
  315. uint8_t r0 = src_argb[2] >> 3;
  316. uint8_t a0 = src_argb[3] >> 7;
  317. uint8_t b1 = src_argb[4] >> 3;
  318. uint8_t g1 = src_argb[5] >> 3;
  319. uint8_t r1 = src_argb[6] >> 3;
  320. uint8_t a1 = src_argb[7] >> 7;
  321. *(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
  322. (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
  323. dst_rgb += 4;
  324. src_argb += 8;
  325. }
  326. if (width & 1) {
  327. uint8_t b0 = src_argb[0] >> 3;
  328. uint8_t g0 = src_argb[1] >> 3;
  329. uint8_t r0 = src_argb[2] >> 3;
  330. uint8_t a0 = src_argb[3] >> 7;
  331. *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
  332. }
  333. }
  334. void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
  335. int x;
  336. for (x = 0; x < width - 1; x += 2) {
  337. uint8_t b0 = src_argb[0] >> 4;
  338. uint8_t g0 = src_argb[1] >> 4;
  339. uint8_t r0 = src_argb[2] >> 4;
  340. uint8_t a0 = src_argb[3] >> 4;
  341. uint8_t b1 = src_argb[4] >> 4;
  342. uint8_t g1 = src_argb[5] >> 4;
  343. uint8_t r1 = src_argb[6] >> 4;
  344. uint8_t a1 = src_argb[7] >> 4;
  345. *(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
  346. (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
  347. dst_rgb += 4;
  348. src_argb += 8;
  349. }
  350. if (width & 1) {
  351. uint8_t b0 = src_argb[0] >> 4;
  352. uint8_t g0 = src_argb[1] >> 4;
  353. uint8_t r0 = src_argb[2] >> 4;
  354. uint8_t a0 = src_argb[3] >> 4;
  355. *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
  356. }
  357. }
  358. void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
  359. int x;
  360. for (x = 0; x < width; ++x) {
  361. uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
  362. uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
  363. uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
  364. uint32_t a0 = (src_abgr[3] >> 6);
  365. *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
  366. dst_ar30 += 4;
  367. src_abgr += 4;
  368. }
  369. }
  370. void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
  371. int x;
  372. for (x = 0; x < width; ++x) {
  373. uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
  374. uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
  375. uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
  376. uint32_t a0 = (src_argb[3] >> 6);
  377. *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
  378. dst_ar30 += 4;
  379. src_argb += 4;
  380. }
  381. }
  382. #ifdef LIBYUV_RGB7
  383. // Old 7 bit math for compatibility on unsupported platforms.
  384. static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
  385. return ((33 * r + 65 * g + 13 * b) >> 7) + 16;
  386. }
  387. #else
  388. // 8 bit
  389. // Intel SSE/AVX uses the following equivalent formula
  390. // 0x7e80 = (66 + 129 + 25) * -128 + 0x1000 (for +16) and 0x0080 for round.
  391. // return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
  392. // 0x7e80) >> 8;
  393. static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
  394. return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
  395. }
  396. #endif
  397. #define AVGB(a, b) (((a) + (b) + 1) >> 1)
  398. #ifdef LIBYUV_RGBTOU_TRUNCATE
  399. static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
  400. return (112 * b - 74 * g - 38 * r + 0x8000) >> 8;
  401. }
  402. static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
  403. return (112 * r - 94 * g - 18 * b + 0x8000) >> 8;
  404. }
  405. #else
  406. // TODO(fbarchard): Add rounding to SIMD and use this
  407. static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
  408. return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
  409. }
  410. static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
  411. return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
  412. }
  413. #endif
  414. #if !defined(LIBYUV_ARGBTOUV_PAVGB)
  415. static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
  416. return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8;
  417. }
  418. static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
  419. return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8;
  420. }
  421. #endif
  422. // ARGBToY_C and ARGBToUV_C
  423. // Intel version mimic SSE/AVX which does 2 pavgb
  424. #if LIBYUV_ARGBTOUV_PAVGB
  425. #define MAKEROWY(NAME, R, G, B, BPP) \
  426. void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
  427. int x; \
  428. for (x = 0; x < width; ++x) { \
  429. dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
  430. src_argb0 += BPP; \
  431. dst_y += 1; \
  432. } \
  433. } \
  434. void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
  435. uint8_t* dst_u, uint8_t* dst_v, int width) { \
  436. const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
  437. int x; \
  438. for (x = 0; x < width - 1; x += 2) { \
  439. uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
  440. AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
  441. uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
  442. AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
  443. uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
  444. AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
  445. dst_u[0] = RGBToU(ar, ag, ab); \
  446. dst_v[0] = RGBToV(ar, ag, ab); \
  447. src_rgb0 += BPP * 2; \
  448. src_rgb1 += BPP * 2; \
  449. dst_u += 1; \
  450. dst_v += 1; \
  451. } \
  452. if (width & 1) { \
  453. uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
  454. uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
  455. uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
  456. dst_u[0] = RGBToU(ar, ag, ab); \
  457. dst_v[0] = RGBToV(ar, ag, ab); \
  458. } \
  459. }
  460. #else
  461. // ARM version does sum / 2 then multiply by 2x smaller coefficients
  462. #define MAKEROWY(NAME, R, G, B, BPP) \
  463. void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
  464. int x; \
  465. for (x = 0; x < width; ++x) { \
  466. dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
  467. src_argb0 += BPP; \
  468. dst_y += 1; \
  469. } \
  470. } \
  471. void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
  472. uint8_t* dst_u, uint8_t* dst_v, int width) { \
  473. const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
  474. int x; \
  475. for (x = 0; x < width - 1; x += 2) { \
  476. uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
  477. src_rgb1[B + BPP] + 1) >> \
  478. 1; \
  479. uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
  480. src_rgb1[G + BPP] + 1) >> \
  481. 1; \
  482. uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
  483. src_rgb1[R + BPP] + 1) >> \
  484. 1; \
  485. dst_u[0] = RGB2xToU(ar, ag, ab); \
  486. dst_v[0] = RGB2xToV(ar, ag, ab); \
  487. src_rgb0 += BPP * 2; \
  488. src_rgb1 += BPP * 2; \
  489. dst_u += 1; \
  490. dst_v += 1; \
  491. } \
  492. if (width & 1) { \
  493. uint16_t ab = src_rgb0[B] + src_rgb1[B]; \
  494. uint16_t ag = src_rgb0[G] + src_rgb1[G]; \
  495. uint16_t ar = src_rgb0[R] + src_rgb1[R]; \
  496. dst_u[0] = RGB2xToU(ar, ag, ab); \
  497. dst_v[0] = RGB2xToV(ar, ag, ab); \
  498. } \
  499. }
  500. #endif
  501. MAKEROWY(ARGB, 2, 1, 0, 4)
  502. MAKEROWY(BGRA, 1, 2, 3, 4)
  503. MAKEROWY(ABGR, 0, 1, 2, 4)
  504. MAKEROWY(RGBA, 3, 2, 1, 4)
  505. MAKEROWY(RGB24, 2, 1, 0, 3)
  506. MAKEROWY(RAW, 0, 1, 2, 3)
  507. #undef MAKEROWY
  508. // JPeg uses a variation on BT.601-1 full range
  509. // y = 0.29900 * r + 0.58700 * g + 0.11400 * b
  510. // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
  511. // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
  512. // BT.601 Mpeg range uses:
  513. // b 0.1016 * 255 = 25.908 = 25
  514. // g 0.5078 * 255 = 129.489 = 129
  515. // r 0.2578 * 255 = 65.739 = 66
  516. // JPeg 7 bit Y (deprecated)
  517. // b 0.11400 * 128 = 14.592 = 15
  518. // g 0.58700 * 128 = 75.136 = 75
  519. // r 0.29900 * 128 = 38.272 = 38
  520. // JPeg 8 bit Y:
  521. // b 0.11400 * 256 = 29.184 = 29
  522. // g 0.58700 * 256 = 150.272 = 150
  523. // r 0.29900 * 256 = 76.544 = 77
  524. // JPeg 8 bit U:
  525. // b 0.50000 * 255 = 127.5 = 127
  526. // g -0.33126 * 255 = -84.4713 = -84
  527. // r -0.16874 * 255 = -43.0287 = -43
  528. // JPeg 8 bit V:
  529. // b -0.08131 * 255 = -20.73405 = -20
  530. // g -0.41869 * 255 = -106.76595 = -107
  531. // r 0.50000 * 255 = 127.5 = 127
  532. #ifdef LIBYUV_RGB7
  533. // Old 7 bit math for compatibility on unsupported platforms.
  534. static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
  535. return (38 * r + 75 * g + 15 * b + 64) >> 7;
  536. }
  537. #else
  538. // 8 bit
  539. static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
  540. return (77 * r + 150 * g + 29 * b + 128) >> 8;
  541. }
  542. #endif
  543. #if defined(LIBYUV_ARGBTOUV_PAVGB)
  544. static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
  545. return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
  546. }
  547. static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
  548. return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
  549. }
  550. #else
  551. static __inline int RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
  552. return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
  553. }
  554. static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
  555. return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
  556. }
  557. #endif
  558. // ARGBToYJ_C and ARGBToUVJ_C
  559. // Intel version mimic SSE/AVX which does 2 pavgb
  560. #if LIBYUV_ARGBTOUV_PAVGB
  561. #define MAKEROWYJ(NAME, R, G, B, BPP) \
  562. void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
  563. int x; \
  564. for (x = 0; x < width; ++x) { \
  565. dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
  566. src_argb0 += BPP; \
  567. dst_y += 1; \
  568. } \
  569. } \
  570. void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
  571. uint8_t* dst_u, uint8_t* dst_v, int width) { \
  572. const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
  573. int x; \
  574. for (x = 0; x < width - 1; x += 2) { \
  575. uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
  576. AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
  577. uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
  578. AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
  579. uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
  580. AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
  581. dst_u[0] = RGBToUJ(ar, ag, ab); \
  582. dst_v[0] = RGBToVJ(ar, ag, ab); \
  583. src_rgb0 += BPP * 2; \
  584. src_rgb1 += BPP * 2; \
  585. dst_u += 1; \
  586. dst_v += 1; \
  587. } \
  588. if (width & 1) { \
  589. uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
  590. uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
  591. uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
  592. dst_u[0] = RGBToUJ(ar, ag, ab); \
  593. dst_v[0] = RGBToVJ(ar, ag, ab); \
  594. } \
  595. }
  596. #else
  597. // ARM version does sum / 2 then multiply by 2x smaller coefficients
  598. #define MAKEROWYJ(NAME, R, G, B, BPP) \
  599. void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
  600. int x; \
  601. for (x = 0; x < width; ++x) { \
  602. dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
  603. src_argb0 += BPP; \
  604. dst_y += 1; \
  605. } \
  606. } \
  607. void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
  608. uint8_t* dst_u, uint8_t* dst_v, int width) { \
  609. const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
  610. int x; \
  611. for (x = 0; x < width - 1; x += 2) { \
  612. uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
  613. src_rgb1[B + BPP] + 1) >> \
  614. 1; \
  615. uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
  616. src_rgb1[G + BPP] + 1) >> \
  617. 1; \
  618. uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
  619. src_rgb1[R + BPP] + 1) >> \
  620. 1; \
  621. dst_u[0] = RGB2xToUJ(ar, ag, ab); \
  622. dst_v[0] = RGB2xToVJ(ar, ag, ab); \
  623. src_rgb0 += BPP * 2; \
  624. src_rgb1 += BPP * 2; \
  625. dst_u += 1; \
  626. dst_v += 1; \
  627. } \
  628. if (width & 1) { \
  629. uint16_t ab = (src_rgb0[B] + src_rgb1[B]); \
  630. uint16_t ag = (src_rgb0[G] + src_rgb1[G]); \
  631. uint16_t ar = (src_rgb0[R] + src_rgb1[R]); \
  632. dst_u[0] = RGB2xToUJ(ar, ag, ab); \
  633. dst_v[0] = RGB2xToVJ(ar, ag, ab); \
  634. } \
  635. }
  636. #endif
  637. MAKEROWYJ(ARGB, 2, 1, 0, 4)
  638. MAKEROWYJ(RGBA, 3, 2, 1, 4)
  639. MAKEROWYJ(RGB24, 2, 1, 0, 3)
  640. MAKEROWYJ(RAW, 0, 1, 2, 3)
  641. #undef MAKEROWYJ
  642. void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
  643. int x;
  644. for (x = 0; x < width; ++x) {
  645. uint8_t b = src_rgb565[0] & 0x1f;
  646. uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
  647. uint8_t r = src_rgb565[1] >> 3;
  648. b = (b << 3) | (b >> 2);
  649. g = (g << 2) | (g >> 4);
  650. r = (r << 3) | (r >> 2);
  651. dst_y[0] = RGBToY(r, g, b);
  652. src_rgb565 += 2;
  653. dst_y += 1;
  654. }
  655. }
  656. void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
  657. int x;
  658. for (x = 0; x < width; ++x) {
  659. uint8_t b = src_argb1555[0] & 0x1f;
  660. uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
  661. uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
  662. b = (b << 3) | (b >> 2);
  663. g = (g << 3) | (g >> 2);
  664. r = (r << 3) | (r >> 2);
  665. dst_y[0] = RGBToY(r, g, b);
  666. src_argb1555 += 2;
  667. dst_y += 1;
  668. }
  669. }
  670. void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
  671. int x;
  672. for (x = 0; x < width; ++x) {
  673. uint8_t b = src_argb4444[0] & 0x0f;
  674. uint8_t g = src_argb4444[0] >> 4;
  675. uint8_t r = src_argb4444[1] & 0x0f;
  676. b = (b << 4) | b;
  677. g = (g << 4) | g;
  678. r = (r << 4) | r;
  679. dst_y[0] = RGBToY(r, g, b);
  680. src_argb4444 += 2;
  681. dst_y += 1;
  682. }
  683. }
  684. void RGB565ToUVRow_C(const uint8_t* src_rgb565,
  685. int src_stride_rgb565,
  686. uint8_t* dst_u,
  687. uint8_t* dst_v,
  688. int width) {
  689. const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
  690. int x;
  691. for (x = 0; x < width - 1; x += 2) {
  692. uint8_t b0 = src_rgb565[0] & 0x1f;
  693. uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
  694. uint8_t r0 = src_rgb565[1] >> 3;
  695. uint8_t b1 = src_rgb565[2] & 0x1f;
  696. uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
  697. uint8_t r1 = src_rgb565[3] >> 3;
  698. uint8_t b2 = next_rgb565[0] & 0x1f;
  699. uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
  700. uint8_t r2 = next_rgb565[1] >> 3;
  701. uint8_t b3 = next_rgb565[2] & 0x1f;
  702. uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
  703. uint8_t r3 = next_rgb565[3] >> 3;
  704. b0 = (b0 << 3) | (b0 >> 2);
  705. g0 = (g0 << 2) | (g0 >> 4);
  706. r0 = (r0 << 3) | (r0 >> 2);
  707. b1 = (b1 << 3) | (b1 >> 2);
  708. g1 = (g1 << 2) | (g1 >> 4);
  709. r1 = (r1 << 3) | (r1 >> 2);
  710. b2 = (b2 << 3) | (b2 >> 2);
  711. g2 = (g2 << 2) | (g2 >> 4);
  712. r2 = (r2 << 3) | (r2 >> 2);
  713. b3 = (b3 << 3) | (b3 >> 2);
  714. g3 = (g3 << 2) | (g3 >> 4);
  715. r3 = (r3 << 3) | (r3 >> 2);
  716. #if LIBYUV_ARGBTOUV_PAVGB
  717. uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
  718. uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
  719. uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
  720. dst_u[0] = RGBToU(ar, ag, ab);
  721. dst_v[0] = RGBToV(ar, ag, ab);
  722. #else
  723. uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
  724. uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
  725. uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
  726. dst_u[0] = RGB2xToU(r, g, b);
  727. dst_v[0] = RGB2xToV(r, g, b);
  728. #endif
  729. src_rgb565 += 4;
  730. next_rgb565 += 4;
  731. dst_u += 1;
  732. dst_v += 1;
  733. }
  734. if (width & 1) {
  735. uint8_t b0 = src_rgb565[0] & 0x1f;
  736. uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
  737. uint8_t r0 = src_rgb565[1] >> 3;
  738. uint8_t b2 = next_rgb565[0] & 0x1f;
  739. uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
  740. uint8_t r2 = next_rgb565[1] >> 3;
  741. b0 = (b0 << 3) | (b0 >> 2);
  742. g0 = (g0 << 2) | (g0 >> 4);
  743. r0 = (r0 << 3) | (r0 >> 2);
  744. b2 = (b2 << 3) | (b2 >> 2);
  745. g2 = (g2 << 2) | (g2 >> 4);
  746. r2 = (r2 << 3) | (r2 >> 2);
  747. #if LIBYUV_ARGBTOUV_PAVGB
  748. uint8_t ab = AVGB(b0, b2);
  749. uint8_t ag = AVGB(g0, g2);
  750. uint8_t ar = AVGB(r0, r2);
  751. dst_u[0] = RGBToU(ar, ag, ab);
  752. dst_v[0] = RGBToV(ar, ag, ab);
  753. #else
  754. uint16_t b = b0 + b2;
  755. uint16_t g = g0 + g2;
  756. uint16_t r = r0 + r2;
  757. dst_u[0] = RGB2xToU(r, g, b);
  758. dst_v[0] = RGB2xToV(r, g, b);
  759. #endif
  760. }
  761. }
  762. void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
  763. int src_stride_argb1555,
  764. uint8_t* dst_u,
  765. uint8_t* dst_v,
  766. int width) {
  767. const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
  768. int x;
  769. for (x = 0; x < width - 1; x += 2) {
  770. uint8_t b0 = src_argb1555[0] & 0x1f;
  771. uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
  772. uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
  773. uint8_t b1 = src_argb1555[2] & 0x1f;
  774. uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
  775. uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
  776. uint8_t b2 = next_argb1555[0] & 0x1f;
  777. uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
  778. uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
  779. uint8_t b3 = next_argb1555[2] & 0x1f;
  780. uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
  781. uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
  782. b0 = (b0 << 3) | (b0 >> 2);
  783. g0 = (g0 << 3) | (g0 >> 2);
  784. r0 = (r0 << 3) | (r0 >> 2);
  785. b1 = (b1 << 3) | (b1 >> 2);
  786. g1 = (g1 << 3) | (g1 >> 2);
  787. r1 = (r1 << 3) | (r1 >> 2);
  788. b2 = (b2 << 3) | (b2 >> 2);
  789. g2 = (g2 << 3) | (g2 >> 2);
  790. r2 = (r2 << 3) | (r2 >> 2);
  791. b3 = (b3 << 3) | (b3 >> 2);
  792. g3 = (g3 << 3) | (g3 >> 2);
  793. r3 = (r3 << 3) | (r3 >> 2);
  794. #if LIBYUV_ARGBTOUV_PAVGB
  795. uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
  796. uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
  797. uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
  798. dst_u[0] = RGBToU(ar, ag, ab);
  799. dst_v[0] = RGBToV(ar, ag, ab);
  800. #else
  801. uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
  802. uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
  803. uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
  804. dst_u[0] = RGB2xToU(r, g, b);
  805. dst_v[0] = RGB2xToV(r, g, b);
  806. #endif
  807. src_argb1555 += 4;
  808. next_argb1555 += 4;
  809. dst_u += 1;
  810. dst_v += 1;
  811. }
  812. if (width & 1) {
  813. uint8_t b0 = src_argb1555[0] & 0x1f;
  814. uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
  815. uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
  816. uint8_t b2 = next_argb1555[0] & 0x1f;
  817. uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
  818. uint8_t r2 = next_argb1555[1] >> 3;
  819. b0 = (b0 << 3) | (b0 >> 2);
  820. g0 = (g0 << 3) | (g0 >> 2);
  821. r0 = (r0 << 3) | (r0 >> 2);
  822. b2 = (b2 << 3) | (b2 >> 2);
  823. g2 = (g2 << 3) | (g2 >> 2);
  824. r2 = (r2 << 3) | (r2 >> 2);
  825. #if LIBYUV_ARGBTOUV_PAVGB
  826. uint8_t ab = AVGB(b0, b2);
  827. uint8_t ag = AVGB(g0, g2);
  828. uint8_t ar = AVGB(r0, r2);
  829. dst_u[0] = RGBToU(ar, ag, ab);
  830. dst_v[0] = RGBToV(ar, ag, ab);
  831. #else
  832. uint16_t b = b0 + b2;
  833. uint16_t g = g0 + g2;
  834. uint16_t r = r0 + r2;
  835. dst_u[0] = RGB2xToU(r, g, b);
  836. dst_v[0] = RGB2xToV(r, g, b);
  837. #endif
  838. }
  839. }
  840. void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
  841. int src_stride_argb4444,
  842. uint8_t* dst_u,
  843. uint8_t* dst_v,
  844. int width) {
  845. const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
  846. int x;
  847. for (x = 0; x < width - 1; x += 2) {
  848. uint8_t b0 = src_argb4444[0] & 0x0f;
  849. uint8_t g0 = src_argb4444[0] >> 4;
  850. uint8_t r0 = src_argb4444[1] & 0x0f;
  851. uint8_t b1 = src_argb4444[2] & 0x0f;
  852. uint8_t g1 = src_argb4444[2] >> 4;
  853. uint8_t r1 = src_argb4444[3] & 0x0f;
  854. uint8_t b2 = next_argb4444[0] & 0x0f;
  855. uint8_t g2 = next_argb4444[0] >> 4;
  856. uint8_t r2 = next_argb4444[1] & 0x0f;
  857. uint8_t b3 = next_argb4444[2] & 0x0f;
  858. uint8_t g3 = next_argb4444[2] >> 4;
  859. uint8_t r3 = next_argb4444[3] & 0x0f;
  860. b0 = (b0 << 4) | b0;
  861. g0 = (g0 << 4) | g0;
  862. r0 = (r0 << 4) | r0;
  863. b1 = (b1 << 4) | b1;
  864. g1 = (g1 << 4) | g1;
  865. r1 = (r1 << 4) | r1;
  866. b2 = (b2 << 4) | b2;
  867. g2 = (g2 << 4) | g2;
  868. r2 = (r2 << 4) | r2;
  869. b3 = (b3 << 4) | b3;
  870. g3 = (g3 << 4) | g3;
  871. r3 = (r3 << 4) | r3;
  872. #if LIBYUV_ARGBTOUV_PAVGB
  873. uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
  874. uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
  875. uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
  876. dst_u[0] = RGBToU(ar, ag, ab);
  877. dst_v[0] = RGBToV(ar, ag, ab);
  878. #else
  879. uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
  880. uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
  881. uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
  882. dst_u[0] = RGB2xToU(r, g, b);
  883. dst_v[0] = RGB2xToV(r, g, b);
  884. #endif
  885. src_argb4444 += 4;
  886. next_argb4444 += 4;
  887. dst_u += 1;
  888. dst_v += 1;
  889. }
  890. if (width & 1) {
  891. uint8_t b0 = src_argb4444[0] & 0x0f;
  892. uint8_t g0 = src_argb4444[0] >> 4;
  893. uint8_t r0 = src_argb4444[1] & 0x0f;
  894. uint8_t b2 = next_argb4444[0] & 0x0f;
  895. uint8_t g2 = next_argb4444[0] >> 4;
  896. uint8_t r2 = next_argb4444[1] & 0x0f;
  897. b0 = (b0 << 4) | b0;
  898. g0 = (g0 << 4) | g0;
  899. r0 = (r0 << 4) | r0;
  900. b2 = (b2 << 4) | b2;
  901. g2 = (g2 << 4) | g2;
  902. r2 = (r2 << 4) | r2;
  903. #if LIBYUV_ARGBTOUV_PAVGB
  904. uint8_t ab = AVGB(b0, b2);
  905. uint8_t ag = AVGB(g0, g2);
  906. uint8_t ar = AVGB(r0, r2);
  907. dst_u[0] = RGBToU(ar, ag, ab);
  908. dst_v[0] = RGBToV(ar, ag, ab);
  909. #else
  910. uint16_t b = b0 + b2;
  911. uint16_t g = g0 + g2;
  912. uint16_t r = r0 + r2;
  913. dst_u[0] = RGB2xToU(r, g, b);
  914. dst_v[0] = RGB2xToV(r, g, b);
  915. #endif
  916. }
  917. }
  918. void ARGBToUV444Row_C(const uint8_t* src_argb,
  919. uint8_t* dst_u,
  920. uint8_t* dst_v,
  921. int width) {
  922. int x;
  923. for (x = 0; x < width; ++x) {
  924. uint8_t ab = src_argb[0];
  925. uint8_t ag = src_argb[1];
  926. uint8_t ar = src_argb[2];
  927. dst_u[0] = RGBToU(ar, ag, ab);
  928. dst_v[0] = RGBToV(ar, ag, ab);
  929. src_argb += 4;
  930. dst_u += 1;
  931. dst_v += 1;
  932. }
  933. }
  934. void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
  935. int x;
  936. for (x = 0; x < width; ++x) {
  937. uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
  938. dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
  939. dst_argb[3] = src_argb[3];
  940. dst_argb += 4;
  941. src_argb += 4;
  942. }
  943. }
  944. // Convert a row of image to Sepia tone.
  945. void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
  946. int x;
  947. for (x = 0; x < width; ++x) {
  948. int b = dst_argb[0];
  949. int g = dst_argb[1];
  950. int r = dst_argb[2];
  951. int sb = (b * 17 + g * 68 + r * 35) >> 7;
  952. int sg = (b * 22 + g * 88 + r * 45) >> 7;
  953. int sr = (b * 24 + g * 98 + r * 50) >> 7;
  954. // b does not over flow. a is preserved from original.
  955. dst_argb[0] = sb;
  956. dst_argb[1] = clamp255(sg);
  957. dst_argb[2] = clamp255(sr);
  958. dst_argb += 4;
  959. }
  960. }
  961. // Apply color matrix to a row of image. Matrix is signed.
  962. // TODO(fbarchard): Consider adding rounding (+32).
  963. void ARGBColorMatrixRow_C(const uint8_t* src_argb,
  964. uint8_t* dst_argb,
  965. const int8_t* matrix_argb,
  966. int width) {
  967. int x;
  968. for (x = 0; x < width; ++x) {
  969. int b = src_argb[0];
  970. int g = src_argb[1];
  971. int r = src_argb[2];
  972. int a = src_argb[3];
  973. int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
  974. a * matrix_argb[3]) >>
  975. 6;
  976. int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
  977. a * matrix_argb[7]) >>
  978. 6;
  979. int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
  980. a * matrix_argb[11]) >>
  981. 6;
  982. int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
  983. a * matrix_argb[15]) >>
  984. 6;
  985. dst_argb[0] = Clamp(sb);
  986. dst_argb[1] = Clamp(sg);
  987. dst_argb[2] = Clamp(sr);
  988. dst_argb[3] = Clamp(sa);
  989. src_argb += 4;
  990. dst_argb += 4;
  991. }
  992. }
  993. // Apply color table to a row of image.
  994. void ARGBColorTableRow_C(uint8_t* dst_argb,
  995. const uint8_t* table_argb,
  996. int width) {
  997. int x;
  998. for (x = 0; x < width; ++x) {
  999. int b = dst_argb[0];
  1000. int g = dst_argb[1];
  1001. int r = dst_argb[2];
  1002. int a = dst_argb[3];
  1003. dst_argb[0] = table_argb[b * 4 + 0];
  1004. dst_argb[1] = table_argb[g * 4 + 1];
  1005. dst_argb[2] = table_argb[r * 4 + 2];
  1006. dst_argb[3] = table_argb[a * 4 + 3];
  1007. dst_argb += 4;
  1008. }
  1009. }
  1010. // Apply color table to a row of image.
  1011. void RGBColorTableRow_C(uint8_t* dst_argb,
  1012. const uint8_t* table_argb,
  1013. int width) {
  1014. int x;
  1015. for (x = 0; x < width; ++x) {
  1016. int b = dst_argb[0];
  1017. int g = dst_argb[1];
  1018. int r = dst_argb[2];
  1019. dst_argb[0] = table_argb[b * 4 + 0];
  1020. dst_argb[1] = table_argb[g * 4 + 1];
  1021. dst_argb[2] = table_argb[r * 4 + 2];
  1022. dst_argb += 4;
  1023. }
  1024. }
  1025. void ARGBQuantizeRow_C(uint8_t* dst_argb,
  1026. int scale,
  1027. int interval_size,
  1028. int interval_offset,
  1029. int width) {
  1030. int x;
  1031. for (x = 0; x < width; ++x) {
  1032. int b = dst_argb[0];
  1033. int g = dst_argb[1];
  1034. int r = dst_argb[2];
  1035. dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
  1036. dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
  1037. dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
  1038. dst_argb += 4;
  1039. }
  1040. }
  1041. #define REPEAT8(v) (v) | ((v) << 8)
  1042. #define SHADE(f, v) v* f >> 24
  1043. void ARGBShadeRow_C(const uint8_t* src_argb,
  1044. uint8_t* dst_argb,
  1045. int width,
  1046. uint32_t value) {
  1047. const uint32_t b_scale = REPEAT8(value & 0xff);
  1048. const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
  1049. const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
  1050. const uint32_t a_scale = REPEAT8(value >> 24);
  1051. int i;
  1052. for (i = 0; i < width; ++i) {
  1053. const uint32_t b = REPEAT8(src_argb[0]);
  1054. const uint32_t g = REPEAT8(src_argb[1]);
  1055. const uint32_t r = REPEAT8(src_argb[2]);
  1056. const uint32_t a = REPEAT8(src_argb[3]);
  1057. dst_argb[0] = SHADE(b, b_scale);
  1058. dst_argb[1] = SHADE(g, g_scale);
  1059. dst_argb[2] = SHADE(r, r_scale);
  1060. dst_argb[3] = SHADE(a, a_scale);
  1061. src_argb += 4;
  1062. dst_argb += 4;
  1063. }
  1064. }
  1065. #undef REPEAT8
  1066. #undef SHADE
  1067. #define REPEAT8(v) (v) | ((v) << 8)
  1068. #define SHADE(f, v) v* f >> 16
  1069. void ARGBMultiplyRow_C(const uint8_t* src_argb0,
  1070. const uint8_t* src_argb1,
  1071. uint8_t* dst_argb,
  1072. int width) {
  1073. int i;
  1074. for (i = 0; i < width; ++i) {
  1075. const uint32_t b = REPEAT8(src_argb0[0]);
  1076. const uint32_t g = REPEAT8(src_argb0[1]);
  1077. const uint32_t r = REPEAT8(src_argb0[2]);
  1078. const uint32_t a = REPEAT8(src_argb0[3]);
  1079. const uint32_t b_scale = src_argb1[0];
  1080. const uint32_t g_scale = src_argb1[1];
  1081. const uint32_t r_scale = src_argb1[2];
  1082. const uint32_t a_scale = src_argb1[3];
  1083. dst_argb[0] = SHADE(b, b_scale);
  1084. dst_argb[1] = SHADE(g, g_scale);
  1085. dst_argb[2] = SHADE(r, r_scale);
  1086. dst_argb[3] = SHADE(a, a_scale);
  1087. src_argb0 += 4;
  1088. src_argb1 += 4;
  1089. dst_argb += 4;
  1090. }
  1091. }
  1092. #undef REPEAT8
  1093. #undef SHADE
  1094. #define SHADE(f, v) clamp255(v + f)
  1095. void ARGBAddRow_C(const uint8_t* src_argb0,
  1096. const uint8_t* src_argb1,
  1097. uint8_t* dst_argb,
  1098. int width) {
  1099. int i;
  1100. for (i = 0; i < width; ++i) {
  1101. const int b = src_argb0[0];
  1102. const int g = src_argb0[1];
  1103. const int r = src_argb0[2];
  1104. const int a = src_argb0[3];
  1105. const int b_add = src_argb1[0];
  1106. const int g_add = src_argb1[1];
  1107. const int r_add = src_argb1[2];
  1108. const int a_add = src_argb1[3];
  1109. dst_argb[0] = SHADE(b, b_add);
  1110. dst_argb[1] = SHADE(g, g_add);
  1111. dst_argb[2] = SHADE(r, r_add);
  1112. dst_argb[3] = SHADE(a, a_add);
  1113. src_argb0 += 4;
  1114. src_argb1 += 4;
  1115. dst_argb += 4;
  1116. }
  1117. }
  1118. #undef SHADE
  1119. #define SHADE(f, v) clamp0(f - v)
  1120. void ARGBSubtractRow_C(const uint8_t* src_argb0,
  1121. const uint8_t* src_argb1,
  1122. uint8_t* dst_argb,
  1123. int width) {
  1124. int i;
  1125. for (i = 0; i < width; ++i) {
  1126. const int b = src_argb0[0];
  1127. const int g = src_argb0[1];
  1128. const int r = src_argb0[2];
  1129. const int a = src_argb0[3];
  1130. const int b_sub = src_argb1[0];
  1131. const int g_sub = src_argb1[1];
  1132. const int r_sub = src_argb1[2];
  1133. const int a_sub = src_argb1[3];
  1134. dst_argb[0] = SHADE(b, b_sub);
  1135. dst_argb[1] = SHADE(g, g_sub);
  1136. dst_argb[2] = SHADE(r, r_sub);
  1137. dst_argb[3] = SHADE(a, a_sub);
  1138. src_argb0 += 4;
  1139. src_argb1 += 4;
  1140. dst_argb += 4;
  1141. }
  1142. }
  1143. #undef SHADE
  1144. // Sobel functions which mimics SSSE3.
  1145. void SobelXRow_C(const uint8_t* src_y0,
  1146. const uint8_t* src_y1,
  1147. const uint8_t* src_y2,
  1148. uint8_t* dst_sobelx,
  1149. int width) {
  1150. int i;
  1151. for (i = 0; i < width; ++i) {
  1152. int a = src_y0[i];
  1153. int b = src_y1[i];
  1154. int c = src_y2[i];
  1155. int a_sub = src_y0[i + 2];
  1156. int b_sub = src_y1[i + 2];
  1157. int c_sub = src_y2[i + 2];
  1158. int a_diff = a - a_sub;
  1159. int b_diff = b - b_sub;
  1160. int c_diff = c - c_sub;
  1161. int sobel = Abs(a_diff + b_diff * 2 + c_diff);
  1162. dst_sobelx[i] = (uint8_t)(clamp255(sobel));
  1163. }
  1164. }
  1165. void SobelYRow_C(const uint8_t* src_y0,
  1166. const uint8_t* src_y1,
  1167. uint8_t* dst_sobely,
  1168. int width) {
  1169. int i;
  1170. for (i = 0; i < width; ++i) {
  1171. int a = src_y0[i + 0];
  1172. int b = src_y0[i + 1];
  1173. int c = src_y0[i + 2];
  1174. int a_sub = src_y1[i + 0];
  1175. int b_sub = src_y1[i + 1];
  1176. int c_sub = src_y1[i + 2];
  1177. int a_diff = a - a_sub;
  1178. int b_diff = b - b_sub;
  1179. int c_diff = c - c_sub;
  1180. int sobel = Abs(a_diff + b_diff * 2 + c_diff);
  1181. dst_sobely[i] = (uint8_t)(clamp255(sobel));
  1182. }
  1183. }
  1184. void SobelRow_C(const uint8_t* src_sobelx,
  1185. const uint8_t* src_sobely,
  1186. uint8_t* dst_argb,
  1187. int width) {
  1188. int i;
  1189. for (i = 0; i < width; ++i) {
  1190. int r = src_sobelx[i];
  1191. int b = src_sobely[i];
  1192. int s = clamp255(r + b);
  1193. dst_argb[0] = (uint8_t)(s);
  1194. dst_argb[1] = (uint8_t)(s);
  1195. dst_argb[2] = (uint8_t)(s);
  1196. dst_argb[3] = (uint8_t)(255u);
  1197. dst_argb += 4;
  1198. }
  1199. }
  1200. void SobelToPlaneRow_C(const uint8_t* src_sobelx,
  1201. const uint8_t* src_sobely,
  1202. uint8_t* dst_y,
  1203. int width) {
  1204. int i;
  1205. for (i = 0; i < width; ++i) {
  1206. int r = src_sobelx[i];
  1207. int b = src_sobely[i];
  1208. int s = clamp255(r + b);
  1209. dst_y[i] = (uint8_t)(s);
  1210. }
  1211. }
  1212. void SobelXYRow_C(const uint8_t* src_sobelx,
  1213. const uint8_t* src_sobely,
  1214. uint8_t* dst_argb,
  1215. int width) {
  1216. int i;
  1217. for (i = 0; i < width; ++i) {
  1218. int r = src_sobelx[i];
  1219. int b = src_sobely[i];
  1220. int g = clamp255(r + b);
  1221. dst_argb[0] = (uint8_t)(b);
  1222. dst_argb[1] = (uint8_t)(g);
  1223. dst_argb[2] = (uint8_t)(r);
  1224. dst_argb[3] = (uint8_t)(255u);
  1225. dst_argb += 4;
  1226. }
  1227. }
  1228. void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
  1229. // Copy a Y to RGB.
  1230. int x;
  1231. for (x = 0; x < width; ++x) {
  1232. uint8_t y = src_y[0];
  1233. dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
  1234. dst_argb[3] = 255u;
  1235. dst_argb += 4;
  1236. ++src_y;
  1237. }
  1238. }
  1239. // TODO(fbarchard): Unify these structures to be platform independent.
  1240. // TODO(fbarchard): Generate SIMD structures from float matrix.
  1241. // BT.601 YUV to RGB reference
  1242. // R = (Y - 16) * 1.164 - V * -1.596
  1243. // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
  1244. // B = (Y - 16) * 1.164 - U * -2.018
  1245. // Y contribution to R,G,B. Scale and bias.
  1246. #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
  1247. #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
  1248. // U and V contributions to R,G,B.
  1249. #define UB -128 /* max(-128, round(-2.018 * 64)) */
  1250. #define UG 25 /* round(0.391 * 64) */
  1251. #define VG 52 /* round(0.813 * 64) */
  1252. #define VR -102 /* round(-1.596 * 64) */
  1253. // Bias values to subtract 16 from Y and 128 from U and V.
  1254. #define BB (UB * 128 + YGB)
  1255. #define BG (UG * 128 + VG * 128 + YGB)
  1256. #define BR (VR * 128 + YGB)
  1257. #if defined(__aarch64__) // 64 bit arm
  1258. const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
  1259. {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
  1260. {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
  1261. {UG, VG, UG, VG, UG, VG, UG, VG},
  1262. {UG, VG, UG, VG, UG, VG, UG, VG},
  1263. {BB, BG, BR, YGB, 0, 0, 0, 0},
  1264. {0x0101 * YG, YG, 0, 0}};
  1265. const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
  1266. {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
  1267. {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
  1268. {VG, UG, VG, UG, VG, UG, VG, UG},
  1269. {VG, UG, VG, UG, VG, UG, VG, UG},
  1270. {BR, BG, BB, YGB, 0, 0, 0, 0},
  1271. {0x0101 * YG, YG, 0, 0}};
  1272. #elif defined(__arm__) // 32 bit arm
  1273. const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
  1274. {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
  1275. {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
  1276. {BB, BG, BR, YGB, 0, 0, 0, 0},
  1277. {0x0101 * YG, YG, 0, 0}};
  1278. const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
  1279. {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
  1280. {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
  1281. {BR, BG, BB, YGB, 0, 0, 0, 0},
  1282. {0x0101 * YG, YG, 0, 0}};
  1283. #else
  1284. const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
  1285. {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
  1286. UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
  1287. {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
  1288. UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
  1289. {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
  1290. 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
  1291. {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
  1292. {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
  1293. {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
  1294. {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
  1295. {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
  1296. YGB}};
  1297. const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
  1298. {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
  1299. VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
  1300. {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
  1301. VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
  1302. {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
  1303. 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
  1304. {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
  1305. {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
  1306. {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
  1307. {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
  1308. {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
  1309. YGB}};
  1310. #endif
  1311. #undef BB
  1312. #undef BG
  1313. #undef BR
  1314. #undef YGB
  1315. #undef UB
  1316. #undef UG
  1317. #undef VG
  1318. #undef VR
  1319. #undef YG
  1320. // JPEG YUV to RGB reference
  1321. // * R = Y - V * -1.40200
  1322. // * G = Y - U * 0.34414 - V * 0.71414
  1323. // * B = Y - U * -1.77200
  1324. // Y contribution to R,G,B. Scale and bias.
  1325. #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
  1326. #define YGB 32 /* 64 / 2 */
  1327. // U and V contributions to R,G,B.
  1328. #define UB -113 /* round(-1.77200 * 64) */
  1329. #define UG 22 /* round(0.34414 * 64) */
  1330. #define VG 46 /* round(0.71414 * 64) */
  1331. #define VR -90 /* round(-1.40200 * 64) */
  1332. // Bias values to round, and subtract 128 from U and V.
  1333. #define BB (UB * 128 + YGB)
  1334. #define BG (UG * 128 + VG * 128 + YGB)
  1335. #define BR (VR * 128 + YGB)
  1336. #if defined(__aarch64__)
  1337. const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
  1338. {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
  1339. {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
  1340. {UG, VG, UG, VG, UG, VG, UG, VG},
  1341. {UG, VG, UG, VG, UG, VG, UG, VG},
  1342. {BB, BG, BR, YGB, 0, 0, 0, 0},
  1343. {0x0101 * YG, YG, 0, 0}};
  1344. const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
  1345. {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
  1346. {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
  1347. {VG, UG, VG, UG, VG, UG, VG, UG},
  1348. {VG, UG, VG, UG, VG, UG, VG, UG},
  1349. {BR, BG, BB, YGB, 0, 0, 0, 0},
  1350. {0x0101 * YG, YG, 0, 0}};
  1351. #elif defined(__arm__)
  1352. const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
  1353. {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
  1354. {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
  1355. {BB, BG, BR, YGB, 0, 0, 0, 0},
  1356. {0x0101 * YG, YG, 0, 0}};
  1357. const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
  1358. {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
  1359. {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
  1360. {BR, BG, BB, YGB, 0, 0, 0, 0},
  1361. {0x0101 * YG, YG, 0, 0}};
  1362. #else
  1363. const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
  1364. {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
  1365. UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
  1366. {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
  1367. UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
  1368. {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
  1369. 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
  1370. {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
  1371. {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
  1372. {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
  1373. {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
  1374. {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
  1375. YGB}};
  1376. const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
  1377. {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
  1378. VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
  1379. {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
  1380. VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
  1381. {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
  1382. 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
  1383. {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
  1384. {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
  1385. {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
  1386. {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
  1387. {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
  1388. YGB}};
  1389. #endif
  1390. #undef BB
  1391. #undef BG
  1392. #undef BR
  1393. #undef YGB
  1394. #undef UB
  1395. #undef UG
  1396. #undef VG
  1397. #undef VR
  1398. #undef YG
  1399. // BT.709 YUV to RGB reference
  1400. // R = (Y - 16) * 1.164 - V * -1.793
  1401. // G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
  1402. // B = (Y - 16) * 1.164 - U * -2.112
  1403. // See also http://www.equasys.de/colorconversion.html
  1404. // Y contribution to R,G,B. Scale and bias.
  1405. #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
  1406. #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
  1407. // TODO(fbarchard): Find way to express 2.112 instead of 2.0.
  1408. // U and V contributions to R,G,B.
  1409. #define UB -128 /* max(-128, round(-2.112 * 64)) */
  1410. #define UG 14 /* round(0.213 * 64) */
  1411. #define VG 34 /* round(0.533 * 64) */
  1412. #define VR -115 /* round(-1.793 * 64) */
  1413. // Bias values to round, and subtract 128 from U and V.
  1414. #define BB (UB * 128 + YGB)
  1415. #define BG (UG * 128 + VG * 128 + YGB)
  1416. #define BR (VR * 128 + YGB)
  1417. #if defined(__aarch64__)
  1418. const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
  1419. {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
  1420. {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
  1421. {UG, VG, UG, VG, UG, VG, UG, VG},
  1422. {UG, VG, UG, VG, UG, VG, UG, VG},
  1423. {BB, BG, BR, YGB, 0, 0, 0, 0},
  1424. {0x0101 * YG, YG, 0, 0}};
  1425. const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
  1426. {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
  1427. {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
  1428. {VG, UG, VG, UG, VG, UG, VG, UG},
  1429. {VG, UG, VG, UG, VG, UG, VG, UG},
  1430. {BR, BG, BB, YGB, 0, 0, 0, 0},
  1431. {0x0101 * YG, YG, 0, 0}};
  1432. #elif defined(__arm__)
  1433. const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
  1434. {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
  1435. {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
  1436. {BB, BG, BR, YGB, 0, 0, 0, 0},
  1437. {0x0101 * YG, YG, 0, 0}};
  1438. const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
  1439. {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
  1440. {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
  1441. {BR, BG, BB, YGB, 0, 0, 0, 0},
  1442. {0x0101 * YG, YG, 0, 0}};
  1443. #else
  1444. const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
  1445. {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
  1446. UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
  1447. {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
  1448. UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
  1449. {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
  1450. 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
  1451. {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
  1452. {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
  1453. {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
  1454. {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
  1455. {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
  1456. YGB}};
  1457. const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
  1458. {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
  1459. VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
  1460. {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
  1461. VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
  1462. {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
  1463. 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
  1464. {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
  1465. {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
  1466. {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
  1467. {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
  1468. {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
  1469. YGB}};
  1470. #endif
  1471. #undef BB
  1472. #undef BG
  1473. #undef BR
  1474. #undef YGB
  1475. #undef UB
  1476. #undef UG
  1477. #undef VG
  1478. #undef VR
  1479. #undef YG
  1480. // BT.2020 YUV to RGB reference
  1481. // R = (Y - 16) * 1.164384 - V * -1.67867
  1482. // G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042
  1483. // B = (Y - 16) * 1.164384 - U * -2.14177
  1484. // Y contribution to R,G,B. Scale and bias.
  1485. #define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
  1486. #define YGB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
  1487. // TODO(fbarchard): Improve accuracy; the B channel is off by 7%.
  1488. // U and V contributions to R,G,B.
  1489. #define UB -128 /* max(-128, round(-2.142 * 64)) */
  1490. #define UG 12 /* round(0.187326 * 64) */
  1491. #define VG 42 /* round(0.65042 * 64) */
  1492. #define VR -107 /* round(-1.67867 * 64) */
  1493. // Bias values to round, and subtract 128 from U and V.
  1494. #define BB (UB * 128 + YGB)
  1495. #define BG (UG * 128 + VG * 128 + YGB)
  1496. #define BR (VR * 128 + YGB)
  1497. #if defined(__aarch64__)
  1498. const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
  1499. {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
  1500. {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
  1501. {UG, VG, UG, VG, UG, VG, UG, VG},
  1502. {UG, VG, UG, VG, UG, VG, UG, VG},
  1503. {BB, BG, BR, YGB, 0, 0, 0, 0},
  1504. {0x0101 * YG, YG, 0, 0}};
  1505. const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
  1506. {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
  1507. {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
  1508. {VG, UG, VG, UG, VG, UG, VG, UG},
  1509. {VG, UG, VG, UG, VG, UG, VG, UG},
  1510. {BR, BG, BB, YGB, 0, 0, 0, 0},
  1511. {0x0101 * YG, YG, 0, 0}};
  1512. #elif defined(__arm__)
  1513. const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
  1514. {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
  1515. {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
  1516. {BB, BG, BR, YGB, 0, 0, 0, 0},
  1517. {0x0101 * YG, YG, 0, 0}};
  1518. const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
  1519. {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
  1520. {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
  1521. {BR, BG, BB, YGB, 0, 0, 0, 0},
  1522. {0x0101 * YG, YG, 0, 0}};
  1523. #else
  1524. const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
  1525. {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
  1526. UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
  1527. {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
  1528. UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
  1529. {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
  1530. 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
  1531. {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
  1532. {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
  1533. {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
  1534. {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
  1535. {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
  1536. YGB}};
  1537. const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
  1538. {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
  1539. VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
  1540. {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
  1541. VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
  1542. {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
  1543. 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
  1544. {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
  1545. {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
  1546. {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
  1547. {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
  1548. {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
  1549. YGB}};
  1550. #endif
  1551. #undef BB
  1552. #undef BG
  1553. #undef BR
  1554. #undef YGB
  1555. #undef UB
  1556. #undef UG
  1557. #undef VG
  1558. #undef VR
  1559. #undef YG
  1560. // C reference code that mimics the YUV assembly.
  1561. // Reads 8 bit YUV and leaves result as 16 bit.
  1562. static __inline void YuvPixel(uint8_t y,
  1563. uint8_t u,
  1564. uint8_t v,
  1565. uint8_t* b,
  1566. uint8_t* g,
  1567. uint8_t* r,
  1568. const struct YuvConstants* yuvconstants) {
  1569. #if defined(__aarch64__)
  1570. int ub = -yuvconstants->kUVToRB[0];
  1571. int ug = yuvconstants->kUVToG[0];
  1572. int vg = yuvconstants->kUVToG[1];
  1573. int vr = -yuvconstants->kUVToRB[1];
  1574. int bb = yuvconstants->kUVBiasBGR[0];
  1575. int bg = yuvconstants->kUVBiasBGR[1];
  1576. int br = yuvconstants->kUVBiasBGR[2];
  1577. int yg = yuvconstants->kYToRgb[1];
  1578. #elif defined(__arm__)
  1579. int ub = -yuvconstants->kUVToRB[0];
  1580. int ug = yuvconstants->kUVToG[0];
  1581. int vg = yuvconstants->kUVToG[4];
  1582. int vr = -yuvconstants->kUVToRB[4];
  1583. int bb = yuvconstants->kUVBiasBGR[0];
  1584. int bg = yuvconstants->kUVBiasBGR[1];
  1585. int br = yuvconstants->kUVBiasBGR[2];
  1586. int yg = yuvconstants->kYToRgb[1];
  1587. #else
  1588. int ub = yuvconstants->kUVToB[0];
  1589. int ug = yuvconstants->kUVToG[0];
  1590. int vg = yuvconstants->kUVToG[1];
  1591. int vr = yuvconstants->kUVToR[1];
  1592. int bb = yuvconstants->kUVBiasB[0];
  1593. int bg = yuvconstants->kUVBiasG[0];
  1594. int br = yuvconstants->kUVBiasR[0];
  1595. int yg = yuvconstants->kYToRgb[0];
  1596. #endif
  1597. uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
  1598. *b = Clamp((int32_t)(-(u * ub) + y1 + bb) >> 6);
  1599. *g = Clamp((int32_t)(-(u * ug + v * vg) + y1 + bg) >> 6);
  1600. *r = Clamp((int32_t)(-(v * vr) + y1 + br) >> 6);
  1601. }
  1602. // Reads 8 bit YUV and leaves result as 16 bit.
  1603. static __inline void YuvPixel8_16(uint8_t y,
  1604. uint8_t u,
  1605. uint8_t v,
  1606. int* b,
  1607. int* g,
  1608. int* r,
  1609. const struct YuvConstants* yuvconstants) {
  1610. #if defined(__aarch64__)
  1611. int ub = -yuvconstants->kUVToRB[0];
  1612. int ug = yuvconstants->kUVToG[0];
  1613. int vg = yuvconstants->kUVToG[1];
  1614. int vr = -yuvconstants->kUVToRB[1];
  1615. int bb = yuvconstants->kUVBiasBGR[0];
  1616. int bg = yuvconstants->kUVBiasBGR[1];
  1617. int br = yuvconstants->kUVBiasBGR[2];
  1618. int yg = yuvconstants->kYToRgb[1];
  1619. #elif defined(__arm__)
  1620. int ub = -yuvconstants->kUVToRB[0];
  1621. int ug = yuvconstants->kUVToG[0];
  1622. int vg = yuvconstants->kUVToG[4];
  1623. int vr = -yuvconstants->kUVToRB[4];
  1624. int bb = yuvconstants->kUVBiasBGR[0];
  1625. int bg = yuvconstants->kUVBiasBGR[1];
  1626. int br = yuvconstants->kUVBiasBGR[2];
  1627. int yg = yuvconstants->kYToRgb[1];
  1628. #else
  1629. int ub = yuvconstants->kUVToB[0];
  1630. int ug = yuvconstants->kUVToG[0];
  1631. int vg = yuvconstants->kUVToG[1];
  1632. int vr = yuvconstants->kUVToR[1];
  1633. int bb = yuvconstants->kUVBiasB[0];
  1634. int bg = yuvconstants->kUVBiasG[0];
  1635. int br = yuvconstants->kUVBiasR[0];
  1636. int yg = yuvconstants->kYToRgb[0];
  1637. #endif
  1638. uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
  1639. *b = (int)(-(u * ub) + y1 + bb);
  1640. *g = (int)(-(u * ug + v * vg) + y1 + bg);
  1641. *r = (int)(-(v * vr) + y1 + br);
  1642. }
  1643. // C reference code that mimics the YUV 16 bit assembly.
  1644. // Reads 10 bit YUV and leaves result as 16 bit.
  1645. static __inline void YuvPixel16(int16_t y,
  1646. int16_t u,
  1647. int16_t v,
  1648. int* b,
  1649. int* g,
  1650. int* r,
  1651. const struct YuvConstants* yuvconstants) {
  1652. #if defined(__aarch64__)
  1653. int ub = -yuvconstants->kUVToRB[0];
  1654. int ug = yuvconstants->kUVToG[0];
  1655. int vg = yuvconstants->kUVToG[1];
  1656. int vr = -yuvconstants->kUVToRB[1];
  1657. int bb = yuvconstants->kUVBiasBGR[0];
  1658. int bg = yuvconstants->kUVBiasBGR[1];
  1659. int br = yuvconstants->kUVBiasBGR[2];
  1660. int yg = yuvconstants->kYToRgb[1];
  1661. #elif defined(__arm__)
  1662. int ub = -yuvconstants->kUVToRB[0];
  1663. int ug = yuvconstants->kUVToG[0];
  1664. int vg = yuvconstants->kUVToG[4];
  1665. int vr = -yuvconstants->kUVToRB[4];
  1666. int bb = yuvconstants->kUVBiasBGR[0];
  1667. int bg = yuvconstants->kUVBiasBGR[1];
  1668. int br = yuvconstants->kUVBiasBGR[2];
  1669. int yg = yuvconstants->kYToRgb[1];
  1670. #else
  1671. int ub = yuvconstants->kUVToB[0];
  1672. int ug = yuvconstants->kUVToG[0];
  1673. int vg = yuvconstants->kUVToG[1];
  1674. int vr = yuvconstants->kUVToR[1];
  1675. int bb = yuvconstants->kUVBiasB[0];
  1676. int bg = yuvconstants->kUVBiasG[0];
  1677. int br = yuvconstants->kUVBiasR[0];
  1678. int yg = yuvconstants->kYToRgb[0];
  1679. #endif
  1680. uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16;
  1681. u = clamp255(u >> 2);
  1682. v = clamp255(v >> 2);
  1683. *b = (int)(-(u * ub) + y1 + bb);
  1684. *g = (int)(-(u * ug + v * vg) + y1 + bg);
  1685. *r = (int)(-(v * vr) + y1 + br);
  1686. }
  1687. // C reference code that mimics the YUV 10 bit assembly.
  1688. // Reads 10 bit YUV and clamps down to 8 bit RGB.
  1689. static __inline void YuvPixel10(uint16_t y,
  1690. uint16_t u,
  1691. uint16_t v,
  1692. uint8_t* b,
  1693. uint8_t* g,
  1694. uint8_t* r,
  1695. const struct YuvConstants* yuvconstants) {
  1696. int b16;
  1697. int g16;
  1698. int r16;
  1699. YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants);
  1700. *b = Clamp(b16 >> 6);
  1701. *g = Clamp(g16 >> 6);
  1702. *r = Clamp(r16 >> 6);
  1703. }
  1704. // C reference code that mimics the YUV assembly.
  1705. // Reads 8 bit YUV and leaves result as 16 bit.
  1706. static __inline void YPixel(uint8_t y,
  1707. uint8_t* b,
  1708. uint8_t* g,
  1709. uint8_t* r,
  1710. const struct YuvConstants* yuvconstants) {
  1711. #if defined(__aarch64__) || defined(__arm__)
  1712. int ygb = yuvconstants->kUVBiasBGR[3];
  1713. int yg = yuvconstants->kYToRgb[1];
  1714. #else
  1715. int ygb = yuvconstants->kYBiasToRgb[0];
  1716. int yg = yuvconstants->kYToRgb[0];
  1717. #endif
  1718. uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
  1719. *b = Clamp(((int32_t)(y1) + ygb) >> 6);
  1720. *g = Clamp(((int32_t)(y1) + ygb) >> 6);
  1721. *r = Clamp(((int32_t)(y1) + ygb) >> 6);
  1722. }
  1723. #if !defined(LIBYUV_DISABLE_NEON) && \
  1724. (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
  1725. // C mimic assembly.
  1726. // TODO(fbarchard): Remove subsampling from Neon.
  1727. void I444ToARGBRow_C(const uint8_t* src_y,
  1728. const uint8_t* src_u,
  1729. const uint8_t* src_v,
  1730. uint8_t* rgb_buf,
  1731. const struct YuvConstants* yuvconstants,
  1732. int width) {
  1733. int x;
  1734. for (x = 0; x < width - 1; x += 2) {
  1735. uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
  1736. uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
  1737. YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
  1738. yuvconstants);
  1739. rgb_buf[3] = 255;
  1740. YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
  1741. yuvconstants);
  1742. rgb_buf[7] = 255;
  1743. src_y += 2;
  1744. src_u += 2;
  1745. src_v += 2;
  1746. rgb_buf += 8; // Advance 2 pixels.
  1747. }
  1748. if (width & 1) {
  1749. YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
  1750. rgb_buf + 2, yuvconstants);
  1751. rgb_buf[3] = 255;
  1752. }
  1753. }
  1754. #else
  1755. void I444ToARGBRow_C(const uint8_t* src_y,
  1756. const uint8_t* src_u,
  1757. const uint8_t* src_v,
  1758. uint8_t* rgb_buf,
  1759. const struct YuvConstants* yuvconstants,
  1760. int width) {
  1761. int x;
  1762. for (x = 0; x < width; ++x) {
  1763. YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
  1764. rgb_buf + 2, yuvconstants);
  1765. rgb_buf[3] = 255;
  1766. src_y += 1;
  1767. src_u += 1;
  1768. src_v += 1;
  1769. rgb_buf += 4; // Advance 1 pixel.
  1770. }
  1771. }
  1772. #endif
  1773. // Also used for 420
  1774. void I422ToARGBRow_C(const uint8_t* src_y,
  1775. const uint8_t* src_u,
  1776. const uint8_t* src_v,
  1777. uint8_t* rgb_buf,
  1778. const struct YuvConstants* yuvconstants,
  1779. int width) {
  1780. int x;
  1781. for (x = 0; x < width - 1; x += 2) {
  1782. YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
  1783. rgb_buf + 2, yuvconstants);
  1784. rgb_buf[3] = 255;
  1785. YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
  1786. rgb_buf + 6, yuvconstants);
  1787. rgb_buf[7] = 255;
  1788. src_y += 2;
  1789. src_u += 1;
  1790. src_v += 1;
  1791. rgb_buf += 8; // Advance 2 pixels.
  1792. }
  1793. if (width & 1) {
  1794. YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
  1795. rgb_buf + 2, yuvconstants);
  1796. rgb_buf[3] = 255;
  1797. }
  1798. }
  1799. // 10 bit YUV to ARGB
  1800. void I210ToARGBRow_C(const uint16_t* src_y,
  1801. const uint16_t* src_u,
  1802. const uint16_t* src_v,
  1803. uint8_t* rgb_buf,
  1804. const struct YuvConstants* yuvconstants,
  1805. int width) {
  1806. int x;
  1807. for (x = 0; x < width - 1; x += 2) {
  1808. YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
  1809. rgb_buf + 2, yuvconstants);
  1810. rgb_buf[3] = 255;
  1811. YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
  1812. rgb_buf + 6, yuvconstants);
  1813. rgb_buf[7] = 255;
  1814. src_y += 2;
  1815. src_u += 1;
  1816. src_v += 1;
  1817. rgb_buf += 8; // Advance 2 pixels.
  1818. }
  1819. if (width & 1) {
  1820. YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
  1821. rgb_buf + 2, yuvconstants);
  1822. rgb_buf[3] = 255;
  1823. }
  1824. }
  1825. static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
  1826. uint32_t ar30;
  1827. b = b >> 4; // convert 10.6 to 10 bit.
  1828. g = g >> 4;
  1829. r = r >> 4;
  1830. b = Clamp10(b);
  1831. g = Clamp10(g);
  1832. r = Clamp10(r);
  1833. ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
  1834. (*(uint32_t*)rgb_buf) = ar30;
  1835. }
  1836. // 10 bit YUV to 10 bit AR30
  1837. void I210ToAR30Row_C(const uint16_t* src_y,
  1838. const uint16_t* src_u,
  1839. const uint16_t* src_v,
  1840. uint8_t* rgb_buf,
  1841. const struct YuvConstants* yuvconstants,
  1842. int width) {
  1843. int x;
  1844. int b;
  1845. int g;
  1846. int r;
  1847. for (x = 0; x < width - 1; x += 2) {
  1848. YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
  1849. StoreAR30(rgb_buf, b, g, r);
  1850. YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
  1851. StoreAR30(rgb_buf + 4, b, g, r);
  1852. src_y += 2;
  1853. src_u += 1;
  1854. src_v += 1;
  1855. rgb_buf += 8; // Advance 2 pixels.
  1856. }
  1857. if (width & 1) {
  1858. YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
  1859. StoreAR30(rgb_buf, b, g, r);
  1860. }
  1861. }
  1862. // 8 bit YUV to 10 bit AR30
  1863. // Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
  1864. void I422ToAR30Row_C(const uint8_t* src_y,
  1865. const uint8_t* src_u,
  1866. const uint8_t* src_v,
  1867. uint8_t* rgb_buf,
  1868. const struct YuvConstants* yuvconstants,
  1869. int width) {
  1870. int x;
  1871. int b;
  1872. int g;
  1873. int r;
  1874. for (x = 0; x < width - 1; x += 2) {
  1875. YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
  1876. StoreAR30(rgb_buf, b, g, r);
  1877. YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
  1878. StoreAR30(rgb_buf + 4, b, g, r);
  1879. src_y += 2;
  1880. src_u += 1;
  1881. src_v += 1;
  1882. rgb_buf += 8; // Advance 2 pixels.
  1883. }
  1884. if (width & 1) {
  1885. YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
  1886. StoreAR30(rgb_buf, b, g, r);
  1887. }
  1888. }
  1889. void I422AlphaToARGBRow_C(const uint8_t* src_y,
  1890. const uint8_t* src_u,
  1891. const uint8_t* src_v,
  1892. const uint8_t* src_a,
  1893. uint8_t* rgb_buf,
  1894. const struct YuvConstants* yuvconstants,
  1895. int width) {
  1896. int x;
  1897. for (x = 0; x < width - 1; x += 2) {
  1898. YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
  1899. rgb_buf + 2, yuvconstants);
  1900. rgb_buf[3] = src_a[0];
  1901. YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
  1902. rgb_buf + 6, yuvconstants);
  1903. rgb_buf[7] = src_a[1];
  1904. src_y += 2;
  1905. src_u += 1;
  1906. src_v += 1;
  1907. src_a += 2;
  1908. rgb_buf += 8; // Advance 2 pixels.
  1909. }
  1910. if (width & 1) {
  1911. YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
  1912. rgb_buf + 2, yuvconstants);
  1913. rgb_buf[3] = src_a[0];
  1914. }
  1915. }
  1916. void I422ToRGB24Row_C(const uint8_t* src_y,
  1917. const uint8_t* src_u,
  1918. const uint8_t* src_v,
  1919. uint8_t* rgb_buf,
  1920. const struct YuvConstants* yuvconstants,
  1921. int width) {
  1922. int x;
  1923. for (x = 0; x < width - 1; x += 2) {
  1924. YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
  1925. rgb_buf + 2, yuvconstants);
  1926. YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
  1927. rgb_buf + 5, yuvconstants);
  1928. src_y += 2;
  1929. src_u += 1;
  1930. src_v += 1;
  1931. rgb_buf += 6; // Advance 2 pixels.
  1932. }
  1933. if (width & 1) {
  1934. YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
  1935. rgb_buf + 2, yuvconstants);
  1936. }
  1937. }
  1938. void I422ToARGB4444Row_C(const uint8_t* src_y,
  1939. const uint8_t* src_u,
  1940. const uint8_t* src_v,
  1941. uint8_t* dst_argb4444,
  1942. const struct YuvConstants* yuvconstants,
  1943. int width) {
  1944. uint8_t b0;
  1945. uint8_t g0;
  1946. uint8_t r0;
  1947. uint8_t b1;
  1948. uint8_t g1;
  1949. uint8_t r1;
  1950. int x;
  1951. for (x = 0; x < width - 1; x += 2) {
  1952. YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
  1953. YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
  1954. b0 = b0 >> 4;
  1955. g0 = g0 >> 4;
  1956. r0 = r0 >> 4;
  1957. b1 = b1 >> 4;
  1958. g1 = g1 >> 4;
  1959. r1 = r1 >> 4;
  1960. *(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
  1961. (g1 << 20) | (r1 << 24) | 0xf000f000;
  1962. src_y += 2;
  1963. src_u += 1;
  1964. src_v += 1;
  1965. dst_argb4444 += 4; // Advance 2 pixels.
  1966. }
  1967. if (width & 1) {
  1968. YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
  1969. b0 = b0 >> 4;
  1970. g0 = g0 >> 4;
  1971. r0 = r0 >> 4;
  1972. *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
  1973. }
  1974. }
  1975. void I422ToARGB1555Row_C(const uint8_t* src_y,
  1976. const uint8_t* src_u,
  1977. const uint8_t* src_v,
  1978. uint8_t* dst_argb1555,
  1979. const struct YuvConstants* yuvconstants,
  1980. int width) {
  1981. uint8_t b0;
  1982. uint8_t g0;
  1983. uint8_t r0;
  1984. uint8_t b1;
  1985. uint8_t g1;
  1986. uint8_t r1;
  1987. int x;
  1988. for (x = 0; x < width - 1; x += 2) {
  1989. YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
  1990. YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
  1991. b0 = b0 >> 3;
  1992. g0 = g0 >> 3;
  1993. r0 = r0 >> 3;
  1994. b1 = b1 >> 3;
  1995. g1 = g1 >> 3;
  1996. r1 = r1 >> 3;
  1997. *(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) |
  1998. (g1 << 21) | (r1 << 26) | 0x80008000;
  1999. src_y += 2;
  2000. src_u += 1;
  2001. src_v += 1;
  2002. dst_argb1555 += 4; // Advance 2 pixels.
  2003. }
  2004. if (width & 1) {
  2005. YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
  2006. b0 = b0 >> 3;
  2007. g0 = g0 >> 3;
  2008. r0 = r0 >> 3;
  2009. *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
  2010. }
  2011. }
  2012. void I422ToRGB565Row_C(const uint8_t* src_y,
  2013. const uint8_t* src_u,
  2014. const uint8_t* src_v,
  2015. uint8_t* dst_rgb565,
  2016. const struct YuvConstants* yuvconstants,
  2017. int width) {
  2018. uint8_t b0;
  2019. uint8_t g0;
  2020. uint8_t r0;
  2021. uint8_t b1;
  2022. uint8_t g1;
  2023. uint8_t r1;
  2024. int x;
  2025. for (x = 0; x < width - 1; x += 2) {
  2026. YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
  2027. YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
  2028. b0 = b0 >> 3;
  2029. g0 = g0 >> 2;
  2030. r0 = r0 >> 3;
  2031. b1 = b1 >> 3;
  2032. g1 = g1 >> 2;
  2033. r1 = r1 >> 3;
  2034. *(uint32_t*)(dst_rgb565) =
  2035. b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
  2036. src_y += 2;
  2037. src_u += 1;
  2038. src_v += 1;
  2039. dst_rgb565 += 4; // Advance 2 pixels.
  2040. }
  2041. if (width & 1) {
  2042. YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
  2043. b0 = b0 >> 3;
  2044. g0 = g0 >> 2;
  2045. r0 = r0 >> 3;
  2046. *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
  2047. }
  2048. }
  2049. void NV12ToARGBRow_C(const uint8_t* src_y,
  2050. const uint8_t* src_uv,
  2051. uint8_t* rgb_buf,
  2052. const struct YuvConstants* yuvconstants,
  2053. int width) {
  2054. int x;
  2055. for (x = 0; x < width - 1; x += 2) {
  2056. YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
  2057. rgb_buf + 2, yuvconstants);
  2058. rgb_buf[3] = 255;
  2059. YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
  2060. rgb_buf + 6, yuvconstants);
  2061. rgb_buf[7] = 255;
  2062. src_y += 2;
  2063. src_uv += 2;
  2064. rgb_buf += 8; // Advance 2 pixels.
  2065. }
  2066. if (width & 1) {
  2067. YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
  2068. rgb_buf + 2, yuvconstants);
  2069. rgb_buf[3] = 255;
  2070. }
  2071. }
  2072. void NV21ToARGBRow_C(const uint8_t* src_y,
  2073. const uint8_t* src_vu,
  2074. uint8_t* rgb_buf,
  2075. const struct YuvConstants* yuvconstants,
  2076. int width) {
  2077. int x;
  2078. for (x = 0; x < width - 1; x += 2) {
  2079. YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
  2080. rgb_buf + 2, yuvconstants);
  2081. rgb_buf[3] = 255;
  2082. YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
  2083. rgb_buf + 6, yuvconstants);
  2084. rgb_buf[7] = 255;
  2085. src_y += 2;
  2086. src_vu += 2;
  2087. rgb_buf += 8; // Advance 2 pixels.
  2088. }
  2089. if (width & 1) {
  2090. YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
  2091. rgb_buf + 2, yuvconstants);
  2092. rgb_buf[3] = 255;
  2093. }
  2094. }
  2095. void NV12ToRGB24Row_C(const uint8_t* src_y,
  2096. const uint8_t* src_uv,
  2097. uint8_t* rgb_buf,
  2098. const struct YuvConstants* yuvconstants,
  2099. int width) {
  2100. int x;
  2101. for (x = 0; x < width - 1; x += 2) {
  2102. YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
  2103. rgb_buf + 2, yuvconstants);
  2104. YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
  2105. rgb_buf + 5, yuvconstants);
  2106. src_y += 2;
  2107. src_uv += 2;
  2108. rgb_buf += 6; // Advance 2 pixels.
  2109. }
  2110. if (width & 1) {
  2111. YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
  2112. rgb_buf + 2, yuvconstants);
  2113. }
  2114. }
  2115. void NV21ToRGB24Row_C(const uint8_t* src_y,
  2116. const uint8_t* src_vu,
  2117. uint8_t* rgb_buf,
  2118. const struct YuvConstants* yuvconstants,
  2119. int width) {
  2120. int x;
  2121. for (x = 0; x < width - 1; x += 2) {
  2122. YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
  2123. rgb_buf + 2, yuvconstants);
  2124. YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
  2125. rgb_buf + 5, yuvconstants);
  2126. src_y += 2;
  2127. src_vu += 2;
  2128. rgb_buf += 6; // Advance 2 pixels.
  2129. }
  2130. if (width & 1) {
  2131. YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
  2132. rgb_buf + 2, yuvconstants);
  2133. }
  2134. }
  2135. void NV12ToRGB565Row_C(const uint8_t* src_y,
  2136. const uint8_t* src_uv,
  2137. uint8_t* dst_rgb565,
  2138. const struct YuvConstants* yuvconstants,
  2139. int width) {
  2140. uint8_t b0;
  2141. uint8_t g0;
  2142. uint8_t r0;
  2143. uint8_t b1;
  2144. uint8_t g1;
  2145. uint8_t r1;
  2146. int x;
  2147. for (x = 0; x < width - 1; x += 2) {
  2148. YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
  2149. YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
  2150. b0 = b0 >> 3;
  2151. g0 = g0 >> 2;
  2152. r0 = r0 >> 3;
  2153. b1 = b1 >> 3;
  2154. g1 = g1 >> 2;
  2155. r1 = r1 >> 3;
  2156. *(uint32_t*)(dst_rgb565) =
  2157. b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
  2158. src_y += 2;
  2159. src_uv += 2;
  2160. dst_rgb565 += 4; // Advance 2 pixels.
  2161. }
  2162. if (width & 1) {
  2163. YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
  2164. b0 = b0 >> 3;
  2165. g0 = g0 >> 2;
  2166. r0 = r0 >> 3;
  2167. *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
  2168. }
  2169. }
  2170. void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
  2171. uint8_t* rgb_buf,
  2172. const struct YuvConstants* yuvconstants,
  2173. int width) {
  2174. int x;
  2175. for (x = 0; x < width - 1; x += 2) {
  2176. YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
  2177. rgb_buf + 2, yuvconstants);
  2178. rgb_buf[3] = 255;
  2179. YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
  2180. rgb_buf + 6, yuvconstants);
  2181. rgb_buf[7] = 255;
  2182. src_yuy2 += 4;
  2183. rgb_buf += 8; // Advance 2 pixels.
  2184. }
  2185. if (width & 1) {
  2186. YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
  2187. rgb_buf + 2, yuvconstants);
  2188. rgb_buf[3] = 255;
  2189. }
  2190. }
  2191. void UYVYToARGBRow_C(const uint8_t* src_uyvy,
  2192. uint8_t* rgb_buf,
  2193. const struct YuvConstants* yuvconstants,
  2194. int width) {
  2195. int x;
  2196. for (x = 0; x < width - 1; x += 2) {
  2197. YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
  2198. rgb_buf + 2, yuvconstants);
  2199. rgb_buf[3] = 255;
  2200. YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
  2201. rgb_buf + 6, yuvconstants);
  2202. rgb_buf[7] = 255;
  2203. src_uyvy += 4;
  2204. rgb_buf += 8; // Advance 2 pixels.
  2205. }
  2206. if (width & 1) {
  2207. YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
  2208. rgb_buf + 2, yuvconstants);
  2209. rgb_buf[3] = 255;
  2210. }
  2211. }
  2212. void I422ToRGBARow_C(const uint8_t* src_y,
  2213. const uint8_t* src_u,
  2214. const uint8_t* src_v,
  2215. uint8_t* rgb_buf,
  2216. const struct YuvConstants* yuvconstants,
  2217. int width) {
  2218. int x;
  2219. for (x = 0; x < width - 1; x += 2) {
  2220. YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
  2221. rgb_buf + 3, yuvconstants);
  2222. rgb_buf[0] = 255;
  2223. YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
  2224. rgb_buf + 7, yuvconstants);
  2225. rgb_buf[4] = 255;
  2226. src_y += 2;
  2227. src_u += 1;
  2228. src_v += 1;
  2229. rgb_buf += 8; // Advance 2 pixels.
  2230. }
  2231. if (width & 1) {
  2232. YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
  2233. rgb_buf + 3, yuvconstants);
  2234. rgb_buf[0] = 255;
  2235. }
  2236. }
  2237. void I400ToARGBRow_C(const uint8_t* src_y,
  2238. uint8_t* rgb_buf,
  2239. const struct YuvConstants* yuvconstants,
  2240. int width) {
  2241. int x;
  2242. for (x = 0; x < width - 1; x += 2) {
  2243. YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
  2244. rgb_buf[3] = 255;
  2245. YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
  2246. rgb_buf[7] = 255;
  2247. src_y += 2;
  2248. rgb_buf += 8; // Advance 2 pixels.
  2249. }
  2250. if (width & 1) {
  2251. YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
  2252. rgb_buf[3] = 255;
  2253. }
  2254. }
  2255. void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
  2256. int x;
  2257. src += width - 1;
  2258. for (x = 0; x < width - 1; x += 2) {
  2259. dst[x] = src[0];
  2260. dst[x + 1] = src[-1];
  2261. src -= 2;
  2262. }
  2263. if (width & 1) {
  2264. dst[width - 1] = src[0];
  2265. }
  2266. }
  2267. void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
  2268. int x;
  2269. src_uv += (width - 1) << 1;
  2270. for (x = 0; x < width; ++x) {
  2271. dst_uv[0] = src_uv[0];
  2272. dst_uv[1] = src_uv[1];
  2273. src_uv -= 2;
  2274. dst_uv += 2;
  2275. }
  2276. }
  2277. void MirrorSplitUVRow_C(const uint8_t* src_uv,
  2278. uint8_t* dst_u,
  2279. uint8_t* dst_v,
  2280. int width) {
  2281. int x;
  2282. src_uv += (width - 1) << 1;
  2283. for (x = 0; x < width - 1; x += 2) {
  2284. dst_u[x] = src_uv[0];
  2285. dst_u[x + 1] = src_uv[-2];
  2286. dst_v[x] = src_uv[1];
  2287. dst_v[x + 1] = src_uv[-2 + 1];
  2288. src_uv -= 4;
  2289. }
  2290. if (width & 1) {
  2291. dst_u[width - 1] = src_uv[0];
  2292. dst_v[width - 1] = src_uv[1];
  2293. }
  2294. }
  2295. void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
  2296. int x;
  2297. const uint32_t* src32 = (const uint32_t*)(src);
  2298. uint32_t* dst32 = (uint32_t*)(dst);
  2299. src32 += width - 1;
  2300. for (x = 0; x < width - 1; x += 2) {
  2301. dst32[x] = src32[0];
  2302. dst32[x + 1] = src32[-1];
  2303. src32 -= 2;
  2304. }
  2305. if (width & 1) {
  2306. dst32[width - 1] = src32[0];
  2307. }
  2308. }
  2309. void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) {
  2310. int x;
  2311. src_rgb24 += width * 3 - 3;
  2312. for (x = 0; x < width; ++x) {
  2313. uint8_t b = src_rgb24[0];
  2314. uint8_t g = src_rgb24[1];
  2315. uint8_t r = src_rgb24[2];
  2316. dst_rgb24[0] = b;
  2317. dst_rgb24[1] = g;
  2318. dst_rgb24[2] = r;
  2319. src_rgb24 -= 3;
  2320. dst_rgb24 += 3;
  2321. }
  2322. }
  2323. void SplitUVRow_C(const uint8_t* src_uv,
  2324. uint8_t* dst_u,
  2325. uint8_t* dst_v,
  2326. int width) {
  2327. int x;
  2328. for (x = 0; x < width - 1; x += 2) {
  2329. dst_u[x] = src_uv[0];
  2330. dst_u[x + 1] = src_uv[2];
  2331. dst_v[x] = src_uv[1];
  2332. dst_v[x + 1] = src_uv[3];
  2333. src_uv += 4;
  2334. }
  2335. if (width & 1) {
  2336. dst_u[width - 1] = src_uv[0];
  2337. dst_v[width - 1] = src_uv[1];
  2338. }
  2339. }
  2340. void MergeUVRow_C(const uint8_t* src_u,
  2341. const uint8_t* src_v,
  2342. uint8_t* dst_uv,
  2343. int width) {
  2344. int x;
  2345. for (x = 0; x < width - 1; x += 2) {
  2346. dst_uv[0] = src_u[x];
  2347. dst_uv[1] = src_v[x];
  2348. dst_uv[2] = src_u[x + 1];
  2349. dst_uv[3] = src_v[x + 1];
  2350. dst_uv += 4;
  2351. }
  2352. if (width & 1) {
  2353. dst_uv[0] = src_u[width - 1];
  2354. dst_uv[1] = src_v[width - 1];
  2355. }
  2356. }
  2357. void SplitRGBRow_C(const uint8_t* src_rgb,
  2358. uint8_t* dst_r,
  2359. uint8_t* dst_g,
  2360. uint8_t* dst_b,
  2361. int width) {
  2362. int x;
  2363. for (x = 0; x < width; ++x) {
  2364. dst_r[x] = src_rgb[0];
  2365. dst_g[x] = src_rgb[1];
  2366. dst_b[x] = src_rgb[2];
  2367. src_rgb += 3;
  2368. }
  2369. }
  2370. void MergeRGBRow_C(const uint8_t* src_r,
  2371. const uint8_t* src_g,
  2372. const uint8_t* src_b,
  2373. uint8_t* dst_rgb,
  2374. int width) {
  2375. int x;
  2376. for (x = 0; x < width; ++x) {
  2377. dst_rgb[0] = src_r[x];
  2378. dst_rgb[1] = src_g[x];
  2379. dst_rgb[2] = src_b[x];
  2380. dst_rgb += 3;
  2381. }
  2382. }
  2383. // Use scale to convert lsb formats to msb, depending how many bits there are:
  2384. // 128 = 9 bits
  2385. // 64 = 10 bits
  2386. // 16 = 12 bits
  2387. // 1 = 16 bits
  2388. void MergeUVRow_16_C(const uint16_t* src_u,
  2389. const uint16_t* src_v,
  2390. uint16_t* dst_uv,
  2391. int scale,
  2392. int width) {
  2393. int x;
  2394. for (x = 0; x < width - 1; x += 2) {
  2395. dst_uv[0] = src_u[x] * scale;
  2396. dst_uv[1] = src_v[x] * scale;
  2397. dst_uv[2] = src_u[x + 1] * scale;
  2398. dst_uv[3] = src_v[x + 1] * scale;
  2399. dst_uv += 4;
  2400. }
  2401. if (width & 1) {
  2402. dst_uv[0] = src_u[width - 1] * scale;
  2403. dst_uv[1] = src_v[width - 1] * scale;
  2404. }
  2405. }
  2406. void MultiplyRow_16_C(const uint16_t* src_y,
  2407. uint16_t* dst_y,
  2408. int scale,
  2409. int width) {
  2410. int x;
  2411. for (x = 0; x < width; ++x) {
  2412. dst_y[x] = src_y[x] * scale;
  2413. }
  2414. }
  2415. // Use scale to convert lsb formats to msb, depending how many bits there are:
  2416. // 32768 = 9 bits
  2417. // 16384 = 10 bits
  2418. // 4096 = 12 bits
  2419. // 256 = 16 bits
  2420. void Convert16To8Row_C(const uint16_t* src_y,
  2421. uint8_t* dst_y,
  2422. int scale,
  2423. int width) {
  2424. int x;
  2425. for (x = 0; x < width; ++x) {
  2426. dst_y[x] = clamp255((src_y[x] * scale) >> 16);
  2427. }
  2428. }
  2429. // Use scale to convert lsb formats to msb, depending how many bits there are:
  2430. // 1024 = 10 bits
  2431. void Convert8To16Row_C(const uint8_t* src_y,
  2432. uint16_t* dst_y,
  2433. int scale,
  2434. int width) {
  2435. int x;
  2436. scale *= 0x0101; // replicates the byte.
  2437. for (x = 0; x < width; ++x) {
  2438. dst_y[x] = (src_y[x] * scale) >> 16;
  2439. }
  2440. }
  2441. void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
  2442. memcpy(dst, src, count);
  2443. }
  2444. void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
  2445. memcpy(dst, src, count * 2);
  2446. }
  2447. void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
  2448. memset(dst, v8, width);
  2449. }
  2450. void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
  2451. int x;
  2452. for (x = 0; x < width; ++x) {
  2453. memcpy(dst_argb + x * sizeof v32, &v32, sizeof v32);
  2454. }
  2455. }
  2456. // Filter 2 rows of YUY2 UV's (422) into U and V (420).
  2457. void YUY2ToUVRow_C(const uint8_t* src_yuy2,
  2458. int src_stride_yuy2,
  2459. uint8_t* dst_u,
  2460. uint8_t* dst_v,
  2461. int width) {
  2462. // Output a row of UV values, filtering 2 rows of YUY2.
  2463. int x;
  2464. for (x = 0; x < width; x += 2) {
  2465. dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
  2466. dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
  2467. src_yuy2 += 4;
  2468. dst_u += 1;
  2469. dst_v += 1;
  2470. }
  2471. }
  2472. // Copy row of YUY2 UV's (422) into U and V (422).
  2473. void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
  2474. uint8_t* dst_u,
  2475. uint8_t* dst_v,
  2476. int width) {
  2477. // Output a row of UV values.
  2478. int x;
  2479. for (x = 0; x < width; x += 2) {
  2480. dst_u[0] = src_yuy2[1];
  2481. dst_v[0] = src_yuy2[3];
  2482. src_yuy2 += 4;
  2483. dst_u += 1;
  2484. dst_v += 1;
  2485. }
  2486. }
  2487. // Copy row of YUY2 Y's (422) into Y (420/422).
  2488. void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
  2489. // Output a row of Y values.
  2490. int x;
  2491. for (x = 0; x < width - 1; x += 2) {
  2492. dst_y[x] = src_yuy2[0];
  2493. dst_y[x + 1] = src_yuy2[2];
  2494. src_yuy2 += 4;
  2495. }
  2496. if (width & 1) {
  2497. dst_y[width - 1] = src_yuy2[0];
  2498. }
  2499. }
  2500. // Filter 2 rows of UYVY UV's (422) into U and V (420).
  2501. void UYVYToUVRow_C(const uint8_t* src_uyvy,
  2502. int src_stride_uyvy,
  2503. uint8_t* dst_u,
  2504. uint8_t* dst_v,
  2505. int width) {
  2506. // Output a row of UV values.
  2507. int x;
  2508. for (x = 0; x < width; x += 2) {
  2509. dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
  2510. dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
  2511. src_uyvy += 4;
  2512. dst_u += 1;
  2513. dst_v += 1;
  2514. }
  2515. }
  2516. // Copy row of UYVY UV's (422) into U and V (422).
  2517. void UYVYToUV422Row_C(const uint8_t* src_uyvy,
  2518. uint8_t* dst_u,
  2519. uint8_t* dst_v,
  2520. int width) {
  2521. // Output a row of UV values.
  2522. int x;
  2523. for (x = 0; x < width; x += 2) {
  2524. dst_u[0] = src_uyvy[0];
  2525. dst_v[0] = src_uyvy[2];
  2526. src_uyvy += 4;
  2527. dst_u += 1;
  2528. dst_v += 1;
  2529. }
  2530. }
  2531. // Copy row of UYVY Y's (422) into Y (420/422).
  2532. void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
  2533. // Output a row of Y values.
  2534. int x;
  2535. for (x = 0; x < width - 1; x += 2) {
  2536. dst_y[x] = src_uyvy[1];
  2537. dst_y[x + 1] = src_uyvy[3];
  2538. src_uyvy += 4;
  2539. }
  2540. if (width & 1) {
  2541. dst_y[width - 1] = src_uyvy[1];
  2542. }
  2543. }
  2544. #define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
  2545. // Blend src_argb0 over src_argb1 and store to dst_argb.
  2546. // dst_argb may be src_argb0 or src_argb1.
  2547. // This code mimics the SSSE3 version for better testability.
  2548. void ARGBBlendRow_C(const uint8_t* src_argb0,
  2549. const uint8_t* src_argb1,
  2550. uint8_t* dst_argb,
  2551. int width) {
  2552. int x;
  2553. for (x = 0; x < width - 1; x += 2) {
  2554. uint32_t fb = src_argb0[0];
  2555. uint32_t fg = src_argb0[1];
  2556. uint32_t fr = src_argb0[2];
  2557. uint32_t a = src_argb0[3];
  2558. uint32_t bb = src_argb1[0];
  2559. uint32_t bg = src_argb1[1];
  2560. uint32_t br = src_argb1[2];
  2561. dst_argb[0] = BLEND(fb, bb, a);
  2562. dst_argb[1] = BLEND(fg, bg, a);
  2563. dst_argb[2] = BLEND(fr, br, a);
  2564. dst_argb[3] = 255u;
  2565. fb = src_argb0[4 + 0];
  2566. fg = src_argb0[4 + 1];
  2567. fr = src_argb0[4 + 2];
  2568. a = src_argb0[4 + 3];
  2569. bb = src_argb1[4 + 0];
  2570. bg = src_argb1[4 + 1];
  2571. br = src_argb1[4 + 2];
  2572. dst_argb[4 + 0] = BLEND(fb, bb, a);
  2573. dst_argb[4 + 1] = BLEND(fg, bg, a);
  2574. dst_argb[4 + 2] = BLEND(fr, br, a);
  2575. dst_argb[4 + 3] = 255u;
  2576. src_argb0 += 8;
  2577. src_argb1 += 8;
  2578. dst_argb += 8;
  2579. }
  2580. if (width & 1) {
  2581. uint32_t fb = src_argb0[0];
  2582. uint32_t fg = src_argb0[1];
  2583. uint32_t fr = src_argb0[2];
  2584. uint32_t a = src_argb0[3];
  2585. uint32_t bb = src_argb1[0];
  2586. uint32_t bg = src_argb1[1];
  2587. uint32_t br = src_argb1[2];
  2588. dst_argb[0] = BLEND(fb, bb, a);
  2589. dst_argb[1] = BLEND(fg, bg, a);
  2590. dst_argb[2] = BLEND(fr, br, a);
  2591. dst_argb[3] = 255u;
  2592. }
  2593. }
  2594. #undef BLEND
  2595. #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
  2596. void BlendPlaneRow_C(const uint8_t* src0,
  2597. const uint8_t* src1,
  2598. const uint8_t* alpha,
  2599. uint8_t* dst,
  2600. int width) {
  2601. int x;
  2602. for (x = 0; x < width - 1; x += 2) {
  2603. dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
  2604. dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
  2605. src0 += 2;
  2606. src1 += 2;
  2607. alpha += 2;
  2608. dst += 2;
  2609. }
  2610. if (width & 1) {
  2611. dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
  2612. }
  2613. }
  2614. #undef UBLEND
  2615. #if defined(__aarch64__) || defined(__arm__)
  2616. #define ATTENUATE(f, a) (f * a + 128) >> 8
  2617. #else
  2618. // This code mimics the SSSE3 version for better testability.
  2619. #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
  2620. #endif
  2621. // Multiply source RGB by alpha and store to destination.
  2622. void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
  2623. int i;
  2624. for (i = 0; i < width - 1; i += 2) {
  2625. uint32_t b = src_argb[0];
  2626. uint32_t g = src_argb[1];
  2627. uint32_t r = src_argb[2];
  2628. uint32_t a = src_argb[3];
  2629. dst_argb[0] = ATTENUATE(b, a);
  2630. dst_argb[1] = ATTENUATE(g, a);
  2631. dst_argb[2] = ATTENUATE(r, a);
  2632. dst_argb[3] = a;
  2633. b = src_argb[4];
  2634. g = src_argb[5];
  2635. r = src_argb[6];
  2636. a = src_argb[7];
  2637. dst_argb[4] = ATTENUATE(b, a);
  2638. dst_argb[5] = ATTENUATE(g, a);
  2639. dst_argb[6] = ATTENUATE(r, a);
  2640. dst_argb[7] = a;
  2641. src_argb += 8;
  2642. dst_argb += 8;
  2643. }
  2644. if (width & 1) {
  2645. const uint32_t b = src_argb[0];
  2646. const uint32_t g = src_argb[1];
  2647. const uint32_t r = src_argb[2];
  2648. const uint32_t a = src_argb[3];
  2649. dst_argb[0] = ATTENUATE(b, a);
  2650. dst_argb[1] = ATTENUATE(g, a);
  2651. dst_argb[2] = ATTENUATE(r, a);
  2652. dst_argb[3] = a;
  2653. }
  2654. }
  2655. #undef ATTENUATE
  2656. // Divide source RGB by alpha and store to destination.
  2657. // b = (b * 255 + (a / 2)) / a;
  2658. // g = (g * 255 + (a / 2)) / a;
  2659. // r = (r * 255 + (a / 2)) / a;
  2660. // Reciprocal method is off by 1 on some values. ie 125
  2661. // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
  2662. #define T(a) 0x01000000 + (0x10000 / a)
  2663. const uint32_t fixed_invtbl8[256] = {
  2664. 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06),
  2665. T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d),
  2666. T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14),
  2667. T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b),
  2668. T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22),
  2669. T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29),
  2670. T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30),
  2671. T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
  2672. T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e),
  2673. T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45),
  2674. T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c),
  2675. T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53),
  2676. T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a),
  2677. T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61),
  2678. T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68),
  2679. T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
  2680. T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76),
  2681. T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d),
  2682. T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84),
  2683. T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b),
  2684. T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92),
  2685. T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99),
  2686. T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0),
  2687. T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
  2688. T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae),
  2689. T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5),
  2690. T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc),
  2691. T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3),
  2692. T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca),
  2693. T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1),
  2694. T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8),
  2695. T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
  2696. T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6),
  2697. T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed),
  2698. T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4),
  2699. T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb),
  2700. T(0xfc), T(0xfd), T(0xfe), 0x01000100};
  2701. #undef T
  2702. void ARGBUnattenuateRow_C(const uint8_t* src_argb,
  2703. uint8_t* dst_argb,
  2704. int width) {
  2705. int i;
  2706. for (i = 0; i < width; ++i) {
  2707. uint32_t b = src_argb[0];
  2708. uint32_t g = src_argb[1];
  2709. uint32_t r = src_argb[2];
  2710. const uint32_t a = src_argb[3];
  2711. const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
  2712. b = (b * ia) >> 8;
  2713. g = (g * ia) >> 8;
  2714. r = (r * ia) >> 8;
  2715. // Clamping should not be necessary but is free in assembly.
  2716. dst_argb[0] = clamp255(b);
  2717. dst_argb[1] = clamp255(g);
  2718. dst_argb[2] = clamp255(r);
  2719. dst_argb[3] = a;
  2720. src_argb += 4;
  2721. dst_argb += 4;
  2722. }
  2723. }
  2724. void ComputeCumulativeSumRow_C(const uint8_t* row,
  2725. int32_t* cumsum,
  2726. const int32_t* previous_cumsum,
  2727. int width) {
  2728. int32_t row_sum[4] = {0, 0, 0, 0};
  2729. int x;
  2730. for (x = 0; x < width; ++x) {
  2731. row_sum[0] += row[x * 4 + 0];
  2732. row_sum[1] += row[x * 4 + 1];
  2733. row_sum[2] += row[x * 4 + 2];
  2734. row_sum[3] += row[x * 4 + 3];
  2735. cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
  2736. cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
  2737. cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
  2738. cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
  2739. }
  2740. }
  2741. void CumulativeSumToAverageRow_C(const int32_t* tl,
  2742. const int32_t* bl,
  2743. int w,
  2744. int area,
  2745. uint8_t* dst,
  2746. int count) {
  2747. float ooa = 1.0f / area;
  2748. int i;
  2749. for (i = 0; i < count; ++i) {
  2750. dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
  2751. dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
  2752. dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
  2753. dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
  2754. dst += 4;
  2755. tl += 4;
  2756. bl += 4;
  2757. }
  2758. }
  2759. // Copy pixels from rotated source to destination row with a slope.
  2760. LIBYUV_API
  2761. void ARGBAffineRow_C(const uint8_t* src_argb,
  2762. int src_argb_stride,
  2763. uint8_t* dst_argb,
  2764. const float* uv_dudv,
  2765. int width) {
  2766. int i;
  2767. // Render a row of pixels from source into a buffer.
  2768. float uv[2];
  2769. uv[0] = uv_dudv[0];
  2770. uv[1] = uv_dudv[1];
  2771. for (i = 0; i < width; ++i) {
  2772. int x = (int)(uv[0]);
  2773. int y = (int)(uv[1]);
  2774. *(uint32_t*)(dst_argb) =
  2775. *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
  2776. dst_argb += 4;
  2777. uv[0] += uv_dudv[2];
  2778. uv[1] += uv_dudv[3];
  2779. }
  2780. }
  2781. // Blend 2 rows into 1.
  2782. static void HalfRow_C(const uint8_t* src_uv,
  2783. ptrdiff_t src_uv_stride,
  2784. uint8_t* dst_uv,
  2785. int width) {
  2786. int x;
  2787. for (x = 0; x < width; ++x) {
  2788. dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
  2789. }
  2790. }
  2791. static void HalfRow_16_C(const uint16_t* src_uv,
  2792. ptrdiff_t src_uv_stride,
  2793. uint16_t* dst_uv,
  2794. int width) {
  2795. int x;
  2796. for (x = 0; x < width; ++x) {
  2797. dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
  2798. }
  2799. }
  2800. // C version 2x2 -> 2x1.
  2801. void InterpolateRow_C(uint8_t* dst_ptr,
  2802. const uint8_t* src_ptr,
  2803. ptrdiff_t src_stride,
  2804. int width,
  2805. int source_y_fraction) {
  2806. int y1_fraction = source_y_fraction;
  2807. int y0_fraction = 256 - y1_fraction;
  2808. const uint8_t* src_ptr1 = src_ptr + src_stride;
  2809. int x;
  2810. if (y1_fraction == 0) {
  2811. memcpy(dst_ptr, src_ptr, width);
  2812. return;
  2813. }
  2814. if (y1_fraction == 128) {
  2815. HalfRow_C(src_ptr, src_stride, dst_ptr, width);
  2816. return;
  2817. }
  2818. for (x = 0; x < width - 1; x += 2) {
  2819. dst_ptr[0] =
  2820. (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
  2821. dst_ptr[1] =
  2822. (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
  2823. src_ptr += 2;
  2824. src_ptr1 += 2;
  2825. dst_ptr += 2;
  2826. }
  2827. if (width & 1) {
  2828. dst_ptr[0] =
  2829. (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
  2830. }
  2831. }
  2832. void InterpolateRow_16_C(uint16_t* dst_ptr,
  2833. const uint16_t* src_ptr,
  2834. ptrdiff_t src_stride,
  2835. int width,
  2836. int source_y_fraction) {
  2837. int y1_fraction = source_y_fraction;
  2838. int y0_fraction = 256 - y1_fraction;
  2839. const uint16_t* src_ptr1 = src_ptr + src_stride;
  2840. int x;
  2841. if (source_y_fraction == 0) {
  2842. memcpy(dst_ptr, src_ptr, width * 2);
  2843. return;
  2844. }
  2845. if (source_y_fraction == 128) {
  2846. HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
  2847. return;
  2848. }
  2849. for (x = 0; x < width - 1; x += 2) {
  2850. dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
  2851. dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
  2852. src_ptr += 2;
  2853. src_ptr1 += 2;
  2854. dst_ptr += 2;
  2855. }
  2856. if (width & 1) {
  2857. dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
  2858. }
  2859. }
  2860. // Use first 4 shuffler values to reorder ARGB channels.
  2861. void ARGBShuffleRow_C(const uint8_t* src_argb,
  2862. uint8_t* dst_argb,
  2863. const uint8_t* shuffler,
  2864. int width) {
  2865. int index0 = shuffler[0];
  2866. int index1 = shuffler[1];
  2867. int index2 = shuffler[2];
  2868. int index3 = shuffler[3];
  2869. // Shuffle a row of ARGB.
  2870. int x;
  2871. for (x = 0; x < width; ++x) {
  2872. // To support in-place conversion.
  2873. uint8_t b = src_argb[index0];
  2874. uint8_t g = src_argb[index1];
  2875. uint8_t r = src_argb[index2];
  2876. uint8_t a = src_argb[index3];
  2877. dst_argb[0] = b;
  2878. dst_argb[1] = g;
  2879. dst_argb[2] = r;
  2880. dst_argb[3] = a;
  2881. src_argb += 4;
  2882. dst_argb += 4;
  2883. }
  2884. }
  2885. void I422ToYUY2Row_C(const uint8_t* src_y,
  2886. const uint8_t* src_u,
  2887. const uint8_t* src_v,
  2888. uint8_t* dst_frame,
  2889. int width) {
  2890. int x;
  2891. for (x = 0; x < width - 1; x += 2) {
  2892. dst_frame[0] = src_y[0];
  2893. dst_frame[1] = src_u[0];
  2894. dst_frame[2] = src_y[1];
  2895. dst_frame[3] = src_v[0];
  2896. dst_frame += 4;
  2897. src_y += 2;
  2898. src_u += 1;
  2899. src_v += 1;
  2900. }
  2901. if (width & 1) {
  2902. dst_frame[0] = src_y[0];
  2903. dst_frame[1] = src_u[0];
  2904. dst_frame[2] = 0;
  2905. dst_frame[3] = src_v[0];
  2906. }
  2907. }
  2908. void I422ToUYVYRow_C(const uint8_t* src_y,
  2909. const uint8_t* src_u,
  2910. const uint8_t* src_v,
  2911. uint8_t* dst_frame,
  2912. int width) {
  2913. int x;
  2914. for (x = 0; x < width - 1; x += 2) {
  2915. dst_frame[0] = src_u[0];
  2916. dst_frame[1] = src_y[0];
  2917. dst_frame[2] = src_v[0];
  2918. dst_frame[3] = src_y[1];
  2919. dst_frame += 4;
  2920. src_y += 2;
  2921. src_u += 1;
  2922. src_v += 1;
  2923. }
  2924. if (width & 1) {
  2925. dst_frame[0] = src_u[0];
  2926. dst_frame[1] = src_y[0];
  2927. dst_frame[2] = src_v[0];
  2928. dst_frame[3] = 0;
  2929. }
  2930. }
  2931. void ARGBPolynomialRow_C(const uint8_t* src_argb,
  2932. uint8_t* dst_argb,
  2933. const float* poly,
  2934. int width) {
  2935. int i;
  2936. for (i = 0; i < width; ++i) {
  2937. float b = (float)(src_argb[0]);
  2938. float g = (float)(src_argb[1]);
  2939. float r = (float)(src_argb[2]);
  2940. float a = (float)(src_argb[3]);
  2941. float b2 = b * b;
  2942. float g2 = g * g;
  2943. float r2 = r * r;
  2944. float a2 = a * a;
  2945. float db = poly[0] + poly[4] * b;
  2946. float dg = poly[1] + poly[5] * g;
  2947. float dr = poly[2] + poly[6] * r;
  2948. float da = poly[3] + poly[7] * a;
  2949. float b3 = b2 * b;
  2950. float g3 = g2 * g;
  2951. float r3 = r2 * r;
  2952. float a3 = a2 * a;
  2953. db += poly[8] * b2;
  2954. dg += poly[9] * g2;
  2955. dr += poly[10] * r2;
  2956. da += poly[11] * a2;
  2957. db += poly[12] * b3;
  2958. dg += poly[13] * g3;
  2959. dr += poly[14] * r3;
  2960. da += poly[15] * a3;
  2961. dst_argb[0] = Clamp((int32_t)(db));
  2962. dst_argb[1] = Clamp((int32_t)(dg));
  2963. dst_argb[2] = Clamp((int32_t)(dr));
  2964. dst_argb[3] = Clamp((int32_t)(da));
  2965. src_argb += 4;
  2966. dst_argb += 4;
  2967. }
  2968. }
  2969. // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor
  2970. // adjust the source integer range to the half float range desired.
  2971. // This magic constant is 2^-112. Multiplying by this
  2972. // is the same as subtracting 112 from the exponent, which
  2973. // is the difference in exponent bias between 32-bit and
  2974. // 16-bit floats. Once we've done this subtraction, we can
  2975. // simply extract the low bits of the exponent and the high
  2976. // bits of the mantissa from our float and we're done.
  2977. // Work around GCC 7 punning warning -Wstrict-aliasing
  2978. #if defined(__GNUC__)
  2979. typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
  2980. #else
  2981. typedef uint32_t uint32_alias_t;
  2982. #endif
  2983. void HalfFloatRow_C(const uint16_t* src,
  2984. uint16_t* dst,
  2985. float scale,
  2986. int width) {
  2987. int i;
  2988. float mult = 1.9259299444e-34f * scale;
  2989. for (i = 0; i < width; ++i) {
  2990. float value = src[i] * mult;
  2991. dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13);
  2992. }
  2993. }
  2994. void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) {
  2995. int i;
  2996. for (i = 0; i < width; ++i) {
  2997. float value = src[i] * scale;
  2998. dst[i] = value;
  2999. }
  3000. }
  3001. void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
  3002. uint8_t* dst_argb,
  3003. int width,
  3004. const uint8_t* luma,
  3005. uint32_t lumacoeff) {
  3006. uint32_t bc = lumacoeff & 0xff;
  3007. uint32_t gc = (lumacoeff >> 8) & 0xff;
  3008. uint32_t rc = (lumacoeff >> 16) & 0xff;
  3009. int i;
  3010. for (i = 0; i < width - 1; i += 2) {
  3011. // Luminance in rows, color values in columns.
  3012. const uint8_t* luma0 =
  3013. ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
  3014. luma;
  3015. const uint8_t* luma1;
  3016. dst_argb[0] = luma0[src_argb[0]];
  3017. dst_argb[1] = luma0[src_argb[1]];
  3018. dst_argb[2] = luma0[src_argb[2]];
  3019. dst_argb[3] = src_argb[3];
  3020. luma1 =
  3021. ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
  3022. luma;
  3023. dst_argb[4] = luma1[src_argb[4]];
  3024. dst_argb[5] = luma1[src_argb[5]];
  3025. dst_argb[6] = luma1[src_argb[6]];
  3026. dst_argb[7] = src_argb[7];
  3027. src_argb += 8;
  3028. dst_argb += 8;
  3029. }
  3030. if (width & 1) {
  3031. // Luminance in rows, color values in columns.
  3032. const uint8_t* luma0 =
  3033. ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
  3034. luma;
  3035. dst_argb[0] = luma0[src_argb[0]];
  3036. dst_argb[1] = luma0[src_argb[1]];
  3037. dst_argb[2] = luma0[src_argb[2]];
  3038. dst_argb[3] = src_argb[3];
  3039. }
  3040. }
  3041. void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
  3042. int i;
  3043. for (i = 0; i < width - 1; i += 2) {
  3044. dst[3] = src[3];
  3045. dst[7] = src[7];
  3046. dst += 8;
  3047. src += 8;
  3048. }
  3049. if (width & 1) {
  3050. dst[3] = src[3];
  3051. }
  3052. }
  3053. void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
  3054. int i;
  3055. for (i = 0; i < width - 1; i += 2) {
  3056. dst_a[0] = src_argb[3];
  3057. dst_a[1] = src_argb[7];
  3058. dst_a += 2;
  3059. src_argb += 8;
  3060. }
  3061. if (width & 1) {
  3062. dst_a[0] = src_argb[3];
  3063. }
  3064. }
  3065. void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
  3066. int i;
  3067. for (i = 0; i < width - 1; i += 2) {
  3068. dst[3] = src[0];
  3069. dst[7] = src[1];
  3070. dst += 8;
  3071. src += 2;
  3072. }
  3073. if (width & 1) {
  3074. dst[3] = src[0];
  3075. }
  3076. }
  3077. // Maximum temporary width for wrappers to process at a time, in pixels.
  3078. #define MAXTWIDTH 2048
  3079. #if !(defined(_MSC_VER) && defined(_M_IX86)) && \
  3080. defined(HAS_I422TORGB565ROW_SSSE3)
  3081. // row_win.cc has asm version, but GCC uses 2 step wrapper.
  3082. void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
  3083. const uint8_t* src_u,
  3084. const uint8_t* src_v,
  3085. uint8_t* dst_rgb565,
  3086. const struct YuvConstants* yuvconstants,
  3087. int width) {
  3088. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3089. while (width > 0) {
  3090. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3091. I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
  3092. ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
  3093. src_y += twidth;
  3094. src_u += twidth / 2;
  3095. src_v += twidth / 2;
  3096. dst_rgb565 += twidth * 2;
  3097. width -= twidth;
  3098. }
  3099. }
  3100. #endif
  3101. #if defined(HAS_I422TOARGB1555ROW_SSSE3)
  3102. void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
  3103. const uint8_t* src_u,
  3104. const uint8_t* src_v,
  3105. uint8_t* dst_argb1555,
  3106. const struct YuvConstants* yuvconstants,
  3107. int width) {
  3108. // Row buffer for intermediate ARGB pixels.
  3109. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3110. while (width > 0) {
  3111. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3112. I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
  3113. ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
  3114. src_y += twidth;
  3115. src_u += twidth / 2;
  3116. src_v += twidth / 2;
  3117. dst_argb1555 += twidth * 2;
  3118. width -= twidth;
  3119. }
  3120. }
  3121. #endif
  3122. #if defined(HAS_I422TOARGB4444ROW_SSSE3)
  3123. void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
  3124. const uint8_t* src_u,
  3125. const uint8_t* src_v,
  3126. uint8_t* dst_argb4444,
  3127. const struct YuvConstants* yuvconstants,
  3128. int width) {
  3129. // Row buffer for intermediate ARGB pixels.
  3130. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3131. while (width > 0) {
  3132. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3133. I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
  3134. ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
  3135. src_y += twidth;
  3136. src_u += twidth / 2;
  3137. src_v += twidth / 2;
  3138. dst_argb4444 += twidth * 2;
  3139. width -= twidth;
  3140. }
  3141. }
  3142. #endif
  3143. #if defined(HAS_NV12TORGB565ROW_SSSE3)
  3144. void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
  3145. const uint8_t* src_uv,
  3146. uint8_t* dst_rgb565,
  3147. const struct YuvConstants* yuvconstants,
  3148. int width) {
  3149. // Row buffer for intermediate ARGB pixels.
  3150. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3151. while (width > 0) {
  3152. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3153. NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
  3154. ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
  3155. src_y += twidth;
  3156. src_uv += twidth;
  3157. dst_rgb565 += twidth * 2;
  3158. width -= twidth;
  3159. }
  3160. }
  3161. #endif
  3162. #if defined(HAS_NV12TORGB24ROW_SSSE3)
  3163. void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
  3164. const uint8_t* src_uv,
  3165. uint8_t* dst_rgb24,
  3166. const struct YuvConstants* yuvconstants,
  3167. int width) {
  3168. // Row buffer for intermediate ARGB pixels.
  3169. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3170. while (width > 0) {
  3171. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3172. NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
  3173. ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
  3174. src_y += twidth;
  3175. src_uv += twidth;
  3176. dst_rgb24 += twidth * 3;
  3177. width -= twidth;
  3178. }
  3179. }
  3180. #endif
  3181. #if defined(HAS_NV21TORGB24ROW_SSSE3)
  3182. void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
  3183. const uint8_t* src_vu,
  3184. uint8_t* dst_rgb24,
  3185. const struct YuvConstants* yuvconstants,
  3186. int width) {
  3187. // Row buffer for intermediate ARGB pixels.
  3188. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3189. while (width > 0) {
  3190. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3191. NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth);
  3192. ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
  3193. src_y += twidth;
  3194. src_vu += twidth;
  3195. dst_rgb24 += twidth * 3;
  3196. width -= twidth;
  3197. }
  3198. }
  3199. #endif
  3200. #if defined(HAS_NV12TORGB24ROW_AVX2)
  3201. void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
  3202. const uint8_t* src_uv,
  3203. uint8_t* dst_rgb24,
  3204. const struct YuvConstants* yuvconstants,
  3205. int width) {
  3206. // Row buffer for intermediate ARGB pixels.
  3207. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3208. while (width > 0) {
  3209. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3210. NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
  3211. #if defined(HAS_ARGBTORGB24ROW_AVX2)
  3212. ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
  3213. #else
  3214. ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
  3215. #endif
  3216. src_y += twidth;
  3217. src_uv += twidth;
  3218. dst_rgb24 += twidth * 3;
  3219. width -= twidth;
  3220. }
  3221. }
  3222. #endif
  3223. #if defined(HAS_NV21TORGB24ROW_AVX2)
  3224. void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
  3225. const uint8_t* src_vu,
  3226. uint8_t* dst_rgb24,
  3227. const struct YuvConstants* yuvconstants,
  3228. int width) {
  3229. // Row buffer for intermediate ARGB pixels.
  3230. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3231. while (width > 0) {
  3232. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3233. NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
  3234. #if defined(HAS_ARGBTORGB24ROW_AVX2)
  3235. ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
  3236. #else
  3237. ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
  3238. #endif
  3239. src_y += twidth;
  3240. src_vu += twidth;
  3241. dst_rgb24 += twidth * 3;
  3242. width -= twidth;
  3243. }
  3244. }
  3245. #endif
  3246. #if defined(HAS_I422TORGB565ROW_AVX2)
  3247. void I422ToRGB565Row_AVX2(const uint8_t* src_y,
  3248. const uint8_t* src_u,
  3249. const uint8_t* src_v,
  3250. uint8_t* dst_rgb565,
  3251. const struct YuvConstants* yuvconstants,
  3252. int width) {
  3253. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3254. while (width > 0) {
  3255. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3256. I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
  3257. #if defined(HAS_ARGBTORGB565ROW_AVX2)
  3258. ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
  3259. #else
  3260. ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
  3261. #endif
  3262. src_y += twidth;
  3263. src_u += twidth / 2;
  3264. src_v += twidth / 2;
  3265. dst_rgb565 += twidth * 2;
  3266. width -= twidth;
  3267. }
  3268. }
  3269. #endif
  3270. #if defined(HAS_I422TOARGB1555ROW_AVX2)
  3271. void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
  3272. const uint8_t* src_u,
  3273. const uint8_t* src_v,
  3274. uint8_t* dst_argb1555,
  3275. const struct YuvConstants* yuvconstants,
  3276. int width) {
  3277. // Row buffer for intermediate ARGB pixels.
  3278. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3279. while (width > 0) {
  3280. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3281. I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
  3282. #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
  3283. ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
  3284. #else
  3285. ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
  3286. #endif
  3287. src_y += twidth;
  3288. src_u += twidth / 2;
  3289. src_v += twidth / 2;
  3290. dst_argb1555 += twidth * 2;
  3291. width -= twidth;
  3292. }
  3293. }
  3294. #endif
  3295. #if defined(HAS_I422TOARGB4444ROW_AVX2)
  3296. void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
  3297. const uint8_t* src_u,
  3298. const uint8_t* src_v,
  3299. uint8_t* dst_argb4444,
  3300. const struct YuvConstants* yuvconstants,
  3301. int width) {
  3302. // Row buffer for intermediate ARGB pixels.
  3303. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3304. while (width > 0) {
  3305. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3306. I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
  3307. #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
  3308. ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
  3309. #else
  3310. ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
  3311. #endif
  3312. src_y += twidth;
  3313. src_u += twidth / 2;
  3314. src_v += twidth / 2;
  3315. dst_argb4444 += twidth * 2;
  3316. width -= twidth;
  3317. }
  3318. }
  3319. #endif
  3320. #if defined(HAS_I422TORGB24ROW_AVX2)
  3321. void I422ToRGB24Row_AVX2(const uint8_t* src_y,
  3322. const uint8_t* src_u,
  3323. const uint8_t* src_v,
  3324. uint8_t* dst_rgb24,
  3325. const struct YuvConstants* yuvconstants,
  3326. int width) {
  3327. // Row buffer for intermediate ARGB pixels.
  3328. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3329. while (width > 0) {
  3330. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3331. I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
  3332. #if defined(HAS_ARGBTORGB24ROW_AVX2)
  3333. ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
  3334. #else
  3335. ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
  3336. #endif
  3337. src_y += twidth;
  3338. src_u += twidth / 2;
  3339. src_v += twidth / 2;
  3340. dst_rgb24 += twidth * 3;
  3341. width -= twidth;
  3342. }
  3343. }
  3344. #endif
  3345. #if defined(HAS_NV12TORGB565ROW_AVX2)
  3346. void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
  3347. const uint8_t* src_uv,
  3348. uint8_t* dst_rgb565,
  3349. const struct YuvConstants* yuvconstants,
  3350. int width) {
  3351. // Row buffer for intermediate ARGB pixels.
  3352. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3353. while (width > 0) {
  3354. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3355. NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
  3356. #if defined(HAS_ARGBTORGB565ROW_AVX2)
  3357. ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
  3358. #else
  3359. ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
  3360. #endif
  3361. src_y += twidth;
  3362. src_uv += twidth;
  3363. dst_rgb565 += twidth * 2;
  3364. width -= twidth;
  3365. }
  3366. }
  3367. #endif
  3368. #ifdef HAS_RGB24TOYJROW_AVX2
  3369. // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
  3370. void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
  3371. // Row buffer for intermediate ARGB pixels.
  3372. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3373. while (width > 0) {
  3374. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3375. RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
  3376. ARGBToYJRow_AVX2(row, dst_yj, twidth);
  3377. src_rgb24 += twidth * 3;
  3378. dst_yj += twidth;
  3379. width -= twidth;
  3380. }
  3381. }
  3382. #endif // HAS_RGB24TOYJROW_AVX2
  3383. #ifdef HAS_RAWTOYJROW_AVX2
  3384. // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
  3385. void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
  3386. // Row buffer for intermediate ARGB pixels.
  3387. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3388. while (width > 0) {
  3389. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3390. RAWToARGBRow_SSSE3(src_raw, row, twidth);
  3391. ARGBToYJRow_AVX2(row, dst_yj, twidth);
  3392. src_raw += twidth * 3;
  3393. dst_yj += twidth;
  3394. width -= twidth;
  3395. }
  3396. }
  3397. #endif // HAS_RAWTOYJROW_AVX2
  3398. #ifdef HAS_RGB24TOYJROW_SSSE3
  3399. // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
  3400. void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
  3401. // Row buffer for intermediate ARGB pixels.
  3402. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3403. while (width > 0) {
  3404. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3405. RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
  3406. ARGBToYJRow_SSSE3(row, dst_yj, twidth);
  3407. src_rgb24 += twidth * 3;
  3408. dst_yj += twidth;
  3409. width -= twidth;
  3410. }
  3411. }
  3412. #endif // HAS_RGB24TOYJROW_SSSE3
  3413. #ifdef HAS_RAWTOYJROW_SSSE3
  3414. // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
  3415. void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
  3416. // Row buffer for intermediate ARGB pixels.
  3417. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
  3418. while (width > 0) {
  3419. int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
  3420. RAWToARGBRow_SSSE3(src_raw, row, twidth);
  3421. ARGBToYJRow_SSSE3(row, dst_yj, twidth);
  3422. src_raw += twidth * 3;
  3423. dst_yj += twidth;
  3424. width -= twidth;
  3425. }
  3426. }
  3427. #endif // HAS_RAWTOYJROW_SSSE3
  3428. float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
  3429. float fsum = 0.f;
  3430. int i;
  3431. for (i = 0; i < width; ++i) {
  3432. float v = *src++;
  3433. fsum += v * v;
  3434. *dst++ = v * scale;
  3435. }
  3436. return fsum;
  3437. }
  3438. float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
  3439. float fmax = 0.f;
  3440. int i;
  3441. for (i = 0; i < width; ++i) {
  3442. float v = *src++;
  3443. float vs = v * scale;
  3444. fmax = (v > fmax) ? v : fmax;
  3445. *dst++ = vs;
  3446. }
  3447. return fmax;
  3448. }
  3449. void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
  3450. int i;
  3451. for (i = 0; i < width; ++i) {
  3452. *dst++ = *src++ * scale;
  3453. }
  3454. }
  3455. void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
  3456. int i;
  3457. for (i = 0; i < width; ++i) {
  3458. *dst++ =
  3459. (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8;
  3460. ++src;
  3461. }
  3462. }
  3463. // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
  3464. void GaussCol_C(const uint16_t* src0,
  3465. const uint16_t* src1,
  3466. const uint16_t* src2,
  3467. const uint16_t* src3,
  3468. const uint16_t* src4,
  3469. uint32_t* dst,
  3470. int width) {
  3471. int i;
  3472. for (i = 0; i < width; ++i) {
  3473. *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
  3474. }
  3475. }
  3476. void GaussRow_F32_C(const float* src, float* dst, int width) {
  3477. int i;
  3478. for (i = 0; i < width; ++i) {
  3479. *dst++ = (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) *
  3480. (1.0f / 256.0f);
  3481. ++src;
  3482. }
  3483. }
  3484. // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
  3485. void GaussCol_F32_C(const float* src0,
  3486. const float* src1,
  3487. const float* src2,
  3488. const float* src3,
  3489. const float* src4,
  3490. float* dst,
  3491. int width) {
  3492. int i;
  3493. for (i = 0; i < width; ++i) {
  3494. *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
  3495. }
  3496. }
  3497. // Convert biplanar NV21 to packed YUV24
  3498. void NV21ToYUV24Row_C(const uint8_t* src_y,
  3499. const uint8_t* src_vu,
  3500. uint8_t* dst_yuv24,
  3501. int width) {
  3502. int x;
  3503. for (x = 0; x < width - 1; x += 2) {
  3504. dst_yuv24[0] = src_vu[0]; // V
  3505. dst_yuv24[1] = src_vu[1]; // U
  3506. dst_yuv24[2] = src_y[0]; // Y0
  3507. dst_yuv24[3] = src_vu[0]; // V
  3508. dst_yuv24[4] = src_vu[1]; // U
  3509. dst_yuv24[5] = src_y[1]; // Y1
  3510. src_y += 2;
  3511. src_vu += 2;
  3512. dst_yuv24 += 6; // Advance 2 pixels.
  3513. }
  3514. if (width & 1) {
  3515. dst_yuv24[0] = src_vu[0]; // V
  3516. dst_yuv24[1] = src_vu[1]; // U
  3517. dst_yuv24[2] = src_y[0]; // Y0
  3518. }
  3519. }
  3520. // Filter 2 rows of AYUV UV's (444) into UV (420).
  3521. void AYUVToUVRow_C(const uint8_t* src_ayuv,
  3522. int src_stride_ayuv,
  3523. uint8_t* dst_uv,
  3524. int width) {
  3525. // Output a row of UV values, filtering 2x2 rows of AYUV.
  3526. int x;
  3527. for (x = 0; x < width; x += 2) {
  3528. dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
  3529. src_ayuv[src_stride_ayuv + 5] + 2) >>
  3530. 2;
  3531. dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
  3532. src_ayuv[src_stride_ayuv + 4] + 2) >>
  3533. 2;
  3534. src_ayuv += 8;
  3535. dst_uv += 2;
  3536. }
  3537. if (width & 1) {
  3538. dst_uv[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
  3539. src_ayuv[src_stride_ayuv + 0] + 2) >>
  3540. 2;
  3541. dst_uv[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
  3542. src_ayuv[src_stride_ayuv + 1] + 2) >>
  3543. 2;
  3544. }
  3545. }
  3546. // Filter 2 rows of AYUV UV's (444) into VU (420).
  3547. void AYUVToVURow_C(const uint8_t* src_ayuv,
  3548. int src_stride_ayuv,
  3549. uint8_t* dst_vu,
  3550. int width) {
  3551. // Output a row of VU values, filtering 2x2 rows of AYUV.
  3552. int x;
  3553. for (x = 0; x < width; x += 2) {
  3554. dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
  3555. src_ayuv[src_stride_ayuv + 4] + 2) >>
  3556. 2;
  3557. dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
  3558. src_ayuv[src_stride_ayuv + 5] + 2) >>
  3559. 2;
  3560. src_ayuv += 8;
  3561. dst_vu += 2;
  3562. }
  3563. if (width & 1) {
  3564. dst_vu[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
  3565. src_ayuv[src_stride_ayuv + 0] + 2) >>
  3566. 2;
  3567. dst_vu[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
  3568. src_ayuv[src_stride_ayuv + 1] + 2) >>
  3569. 2;
  3570. }
  3571. }
  3572. // Copy row of AYUV Y's into Y
  3573. void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
  3574. // Output a row of Y values.
  3575. int x;
  3576. for (x = 0; x < width; ++x) {
  3577. dst_y[x] = src_ayuv[2]; // v,u,y,a
  3578. src_ayuv += 4;
  3579. }
  3580. }
  3581. // Convert UV plane of NV12 to VU of NV21.
  3582. void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
  3583. int x;
  3584. for (x = 0; x < width; ++x) {
  3585. uint8_t u = src_uv[0];
  3586. uint8_t v = src_uv[1];
  3587. dst_vu[0] = v;
  3588. dst_vu[1] = u;
  3589. src_uv += 2;
  3590. dst_vu += 2;
  3591. }
  3592. }
  3593. void HalfMergeUVRow_C(const uint8_t* src_u,
  3594. int src_stride_u,
  3595. const uint8_t* src_v,
  3596. int src_stride_v,
  3597. uint8_t* dst_uv,
  3598. int width) {
  3599. int x;
  3600. for (x = 0; x < width - 1; x += 2) {
  3601. dst_uv[0] = (src_u[0] + src_u[1] + src_u[src_stride_u] +
  3602. src_u[src_stride_u + 1] + 2) >>
  3603. 2;
  3604. dst_uv[1] = (src_v[0] + src_v[1] + src_v[src_stride_v] +
  3605. src_v[src_stride_v + 1] + 2) >>
  3606. 2;
  3607. src_u += 2;
  3608. src_v += 2;
  3609. dst_uv += 2;
  3610. }
  3611. if (width & 1) {
  3612. dst_uv[0] = (src_u[0] + src_u[src_stride_u] + 1) >> 1;
  3613. dst_uv[1] = (src_v[0] + src_v[src_stride_v] + 1) >> 1;
  3614. }
  3615. }
  3616. #ifdef __cplusplus
  3617. } // extern "C"
  3618. } // namespace libyuv
  3619. #endif