123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479 |
- #include "jsimd_dspr2_asm.h"
- LEAF_DSPR2(jsimd_c_null_convert_dspr2)
- SAVE_REGS_ON_STACK 8, s0, s1
- lw t9, 24(sp)
- lw s0, 28(sp)
- andi t0, a0, 3
- beqz t0, 4f
- nop
- 0:
- addiu t9, t9, -1
- bltz t9, 7f
- li t1, 0
- 1:
- sll t3, t1, 2
- lwx t5, t3(a2)
- lw t2, 0(a1)
- sll t4, a3, 2
- lwx t5, t4(t5)
- addu t2, t2, t1
- addu s1, t5, a0
- addu t6, t5, t0
- 2:
- lbu t3, 0(t2)
- addiu t5, t5, 1
- sb t3, -1(t5)
- bne t6, t5, 2b
- addu t2, t2, s0
- 3:
- lbu t3, 0(t2)
- addu t4, t2, s0
- addu t7, t4, s0
- addu t8, t7, s0
- addu t2, t8, s0
- lbu t4, 0(t4)
- lbu t7, 0(t7)
- lbu t8, 0(t8)
- addiu t5, t5, 4
- sb t3, -4(t5)
- sb t4, -3(t5)
- sb t7, -2(t5)
- bne s1, t5, 3b
- sb t8, -1(t5)
- addiu t1, t1, 1
- bne t1, s0, 1b
- nop
- addiu a1, a1, 4
- bgez t9, 0b
- addiu a3, a3, 1
- b 7f
- nop
- 4:
- addiu t9, t9, -1
- bltz t9, 7f
- li t1, 0
- 5:
- sll t3, t1, 2
- lwx t5, t3(a2)
- lw t2, 0(a1)
- sll t4, a3, 2
- lwx t5, t4(t5)
- addu t2, t2, t1
- addu s1, t5, a0
- addu t6, t5, t0
- 6:
- lbu t3, 0(t2)
- addu t4, t2, s0
- addu t7, t4, s0
- addu t8, t7, s0
- addu t2, t8, s0
- lbu t4, 0(t4)
- lbu t7, 0(t7)
- lbu t8, 0(t8)
- addiu t5, t5, 4
- sb t3, -4(t5)
- sb t4, -3(t5)
- sb t7, -2(t5)
- bne s1, t5, 6b
- sb t8, -1(t5)
- addiu t1, t1, 1
- bne t1, s0, 5b
- nop
- addiu a1, a1, 4
- bgez t9, 4b
- addiu a3, a3, 1
- 7:
- RESTORE_REGS_FROM_STACK 8, s0, s1
- j ra
- nop
- END(jsimd_c_null_convert_dspr2)
- .macro GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 colorid, pixel_size, \
- r_offs, g_offs, b_offs
- .macro DO_RGB_TO_YCC r, g, b, inptr
- lbu \r, \r_offs(\inptr)
- lbu \g, \g_offs(\inptr)
- lbu \b, \b_offs(\inptr)
- addiu \inptr, \pixel_size
- .endm
- LEAF_DSPR2(jsimd_\colorid\()_ycc_convert_dspr2)
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- lw t7, 48(sp)
- li s0, 0x4c8b
- li s1, 0x9646
- li s2, 0x1d2f
- li s3, 0xffffd4cd
- li s4, 0xffffab33
- li s5, 0x8000
- li s6, 0xffff94d1
- li s7, 0xffffeb2f
- li t8, 0x807fff
- 0:
- addiu t7, -1
- lw t6, 0(a1)
- lw t0, 0(a2)
- lw t1, 4(a2)
- lw t2, 8(a2)
- sll t3, a3, 2
- lwx t0, t3(t0)
- lwx t1, t3(t1)
- lwx t2, t3(t2)
- addu t9, t2, a0
- addiu a3, 1
- 1:
- DO_RGB_TO_YCC t3, t4, t5, t6
- mtlo s5, $ac0
- mtlo t8, $ac1
- mtlo t8, $ac2
- maddu $ac0, s2, t5
- maddu $ac1, s5, t5
- maddu $ac2, s5, t3
- maddu $ac0, s0, t3
- maddu $ac1, s3, t3
- maddu $ac2, s6, t4
- maddu $ac0, s1, t4
- maddu $ac1, s4, t4
- maddu $ac2, s7, t5
- extr.w t3, $ac0, 16
- extr.w t4, $ac1, 16
- extr.w t5, $ac2, 16
- sb t3, 0(t0)
- sb t4, 0(t1)
- sb t5, 0(t2)
- addiu t0, 1
- addiu t2, 1
- bne t2, t9, 1b
- addiu t1, 1
- bgtz t7, 0b
- addiu a1, 4
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_\colorid\()_ycc_convert_dspr2)
- .purgem DO_RGB_TO_YCC
- .endm
- GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extrgb, 3, 0, 1, 2
- GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extbgr, 3, 2, 1, 0
- GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extrgbx, 4, 0, 1, 2
- GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extbgrx, 4, 2, 1, 0
- GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extxbgr, 4, 3, 2, 1
- GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extxrgb, 4, 1, 2, 3
- .macro GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 colorid, pixel_size, \
- r_offs, g_offs, b_offs, a_offs
- .macro STORE_YCC_TO_RGB scratch0 scratch1 scratch2 outptr
- sb \scratch0, \r_offs(\outptr)
- sb \scratch1, \g_offs(\outptr)
- sb \scratch2, \b_offs(\outptr)
- .if (\pixel_size == 4)
- li t0, 0xFF
- sb t0, \a_offs(\outptr)
- .endif
- addiu \outptr, \pixel_size
- .endm
- LEAF_DSPR2(jsimd_ycc_\colorid\()_convert_dspr2)
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- lw s1, 48(sp)
- li t3, 0x8000
- li t4, 0x166e9
- li t5, 0x1c5a2
- li t6, 0xffff492e
- li t7, 0xffffa7e6
- repl.ph t8, 128
- 0:
- lw s0, 0(a3)
- lw t0, 0(a1)
- lw t1, 4(a1)
- lw t2, 8(a1)
- sll s5, a2, 2
- addiu s1, -1
- lwx s2, s5(t0)
- lwx s3, s5(t1)
- lwx s4, s5(t2)
- addu t9, s2, a0
- addiu a2, 1
- 1:
- lbu s7, 0(s4)
- lbu s6, 0(s3)
- lbu s5, 0(s2)
- addiu s2, 1
- addiu s4, 1
- addiu s7, -128
- addiu s6, -128
- mul t2, t7, s6
- mul t0, t6, s7
- sll s7, 15
- mulq_rs.w t1, t4, s7
- sll s6, 15
- addu t2, t3
- addu t2, t0
- mulq_rs.w t0, t5, s6
- sra t2, 16
- addu t1, s5
- addu t2, s5
- ins t2, t1, 16, 16
- subu.ph t2, t2, t8
- addu t0, s5
- shll_s.ph t2, t2, 8
- subu t0, 128
- shra.ph t2, t2, 8
- shll_s.w t0, t0, 24
- addu.ph t2, t2, t8
- sra t0, t0, 24
- sra t1, t2, 16
- addiu t0, 128
- STORE_YCC_TO_RGB t1, t2, t0, s0
- bne s2, t9, 1b
- addiu s3, 1
- bgtz s1, 0b
- addiu a3, 4
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_ycc_\colorid\()_convert_dspr2)
- .purgem STORE_YCC_TO_RGB
- .endm
- GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extrgb, 3, 0, 1, 2, 3
- GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extbgr, 3, 2, 1, 0, 3
- GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extrgbx, 4, 0, 1, 2, 3
- GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extbgrx, 4, 2, 1, 0, 3
- GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extxbgr, 4, 3, 2, 1, 0
- GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extxrgb, 4, 1, 2, 3, 0
- .macro GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 colorid, pixel_size, \
- r_offs, g_offs, b_offs
- .macro DO_RGB_TO_GRAY r, g, b, inptr
- lbu \r, \r_offs(\inptr)
- lbu \g, \g_offs(\inptr)
- lbu \b, \b_offs(\inptr)
- addiu \inptr, \pixel_size
- .endm
- LEAF_DSPR2(jsimd_\colorid\()_gray_convert_dspr2)
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- li s0, 0x4c8b
- li s1, 0x9646
- li s2, 0x1d2f
- li s7, 0x8000
- lw s6, 48(sp)
- andi t7, a0, 3
- 0:
- addiu s6, -1
- lw t0, 0(a1)
- lw t1, 0(a2)
- sll t3, a3, 2
- lwx t1, t3(t1)
- addiu a3, 1
- addu t9, t1, a0
- subu t8, t9, t7
- beq t1, t8, 2f
- nop
- 1:
- DO_RGB_TO_GRAY t3, t4, t5, t0
- DO_RGB_TO_GRAY s3, s4, s5, t0
- mtlo s7, $ac0
- maddu $ac0, s2, t5
- maddu $ac0, s1, t4
- maddu $ac0, s0, t3
- mtlo s7, $ac1
- maddu $ac1, s2, s5
- maddu $ac1, s1, s4
- maddu $ac1, s0, s3
- extr.w t6, $ac0, 16
- DO_RGB_TO_GRAY t3, t4, t5, t0
- DO_RGB_TO_GRAY s3, s4, s5, t0
- mtlo s7, $ac0
- maddu $ac0, s2, t5
- maddu $ac0, s1, t4
- extr.w t2, $ac1, 16
- maddu $ac0, s0, t3
- mtlo s7, $ac1
- maddu $ac1, s2, s5
- maddu $ac1, s1, s4
- maddu $ac1, s0, s3
- extr.w t5, $ac0, 16
- sb t6, 0(t1)
- sb t2, 1(t1)
- extr.w t3, $ac1, 16
- addiu t1, 4
- sb t5, -2(t1)
- sb t3, -1(t1)
- bne t1, t8, 1b
- nop
- 2:
- beqz t7, 4f
- nop
- 3:
- DO_RGB_TO_GRAY t3, t4, t5, t0
- mtlo s7, $ac0
- maddu $ac0, s2, t5
- maddu $ac0, s1, t4
- maddu $ac0, s0, t3
- extr.w t6, $ac0, 16
- sb t6, 0(t1)
- addiu t1, 1
- bne t1, t9, 3b
- nop
- 4:
- bgtz s6, 0b
- addiu a1, 4
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_\colorid\()_gray_convert_dspr2)
- .purgem DO_RGB_TO_GRAY
- .endm
- GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extrgb, 3, 0, 1, 2
- GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extbgr, 3, 2, 1, 0
- GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extrgbx, 4, 0, 1, 2
- GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extbgrx, 4, 2, 1, 0
- GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extxbgr, 4, 3, 2, 1
- GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extxrgb, 4, 1, 2, 3
- .macro GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 colorid, pixel_size, \
- r1_offs, g1_offs, \
- b1_offs, a1_offs, \
- r2_offs, g2_offs, \
- b2_offs, a2_offs
- .macro STORE_H2V2_2_PIXELS scratch0 scratch1 scratch2 scratch3 scratch4 \
- scratch5 outptr
- sb \scratch0, \r1_offs(\outptr)
- sb \scratch1, \g1_offs(\outptr)
- sb \scratch2, \b1_offs(\outptr)
- sb \scratch3, \r2_offs(\outptr)
- sb \scratch4, \g2_offs(\outptr)
- sb \scratch5, \b2_offs(\outptr)
- .if (\pixel_size == 8)
- li \scratch0, 0xFF
- sb \scratch0, \a1_offs(\outptr)
- sb \scratch0, \a2_offs(\outptr)
- .endif
- addiu \outptr, \pixel_size
- .endm
- .macro STORE_H2V2_1_PIXEL scratch0 scratch1 scratch2 outptr
- sb \scratch0, \r1_offs(\outptr)
- sb \scratch1, \g1_offs(\outptr)
- sb \scratch2, \b1_offs(\outptr)
- .if (\pixel_size == 8)
- li t0, 0xFF
- sb t0, \a1_offs(\outptr)
- .endif
- .endm
- LEAF_DSPR2(jsimd_h2v2_\colorid\()_merged_upsample_dspr2)
- SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
- lw t9, 56(sp)
- lw v0, 0(a1)
- lw v1, 4(a1)
- lw t0, 8(a1)
- sll t1, a2, 3
- addiu t2, t1, 4
- sll t3, a2, 2
- lw t4, 0(a3)
- lwx t1, t1(v0)
- lwx t2, t2(v0)
- lwx t5, t3(v1)
- lwx t6, t3(t0)
- lw t7, 4(a3)
- li s1, 0xe6ea
- addiu t8, s1, 0x7fff
- addiu s0, t8, 0x5eb9
- addiu s1, zero, 0xa7e6
- xori s2, s1, 0xeec8
- srl t3, a0, 1
- blez t3, 2f
- addu t0, t5, t3
- 1:
- lbu t3, 0(t5)
- lbu s3, 0(t6)
- addiu t5, t5, 1
- addiu t3, t3, -128
- addiu s3, s3, -128
- mult $ac1, s1, t3
- madd $ac1, s2, s3
- sll s3, s3, 15
- sll t3, t3, 15
- mulq_rs.w s4, t8, s3
- extr_r.w s5, $ac1, 16
- mulq_rs.w s6, s0, t3
- lbu v0, 0(t1)
- addiu t6, t6, 1
- addiu t1, t1, 2
- addu t3, v0, s4
- addu s3, v0, s5
- addu v1, v0, s6
- addu t3, t9, t3
- addu s3, t9, s3
- addu v1, t9, v1
- lbu AT, 0(t3)
- lbu s7, 0(s3)
- lbu ra, 0(v1)
- lbu v0, -1(t1)
- addu t3, v0, s4
- addu s3, v0, s5
- addu v1, v0, s6
- addu t3, t9, t3
- addu s3, t9, s3
- addu v1, t9, v1
- lbu t3, 0(t3)
- lbu s3, 0(s3)
- lbu v1, 0(v1)
- lbu v0, 0(t2)
- STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t4
- addu t3, v0, s4
- addu s3, v0, s5
- addu v1, v0, s6
- addu t3, t9, t3
- addu s3, t9, s3
- addu v1, t9, v1
- lbu AT, 0(t3)
- lbu s7, 0(s3)
- lbu ra, 0(v1)
- lbu v0, 1(t2)
- addiu t2, t2, 2
- addu t3, v0, s4
- addu s3, v0, s5
- addu v1, v0, s6
- addu t3, t9, t3
- addu s3, t9, s3
- addu v1, t9, v1
- lbu t3, 0(t3)
- lbu s3, 0(s3)
- lbu v1, 0(v1)
- STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t7
- bne t0, t5, 1b
- nop
- 2:
- andi t0, a0, 1
- beqz t0, 4f
- lbu t3, 0(t5)
- lbu s3, 0(t6)
- addiu t3, t3, -128
- addiu s3, s3, -128
- mult $ac1, s1, t3
- madd $ac1, s2, s3
- sll s3, s3, 15
- sll t3, t3, 15
- lbu v0, 0(t1)
- extr_r.w s5, $ac1, 16
- mulq_rs.w s4, t8, s3
- mulq_rs.w s6, s0, t3
- addu t3, v0, s4
- addu s3, v0, s5
- addu v1, v0, s6
- addu t3, t9, t3
- addu s3, t9, s3
- addu v1, t9, v1
- lbu t3, 0(t3)
- lbu s3, 0(s3)
- lbu v1, 0(v1)
- lbu v0, 0(t2)
- STORE_H2V2_1_PIXEL t3, s3, v1, t4
- addu t3, v0, s4
- addu s3, v0, s5
- addu v1, v0, s6
- addu t3, t9, t3
- addu s3, t9, s3
- addu v1, t9, v1
- lbu t3, 0(t3)
- lbu s3, 0(s3)
- lbu v1, 0(v1)
- STORE_H2V2_1_PIXEL t3, s3, v1, t7
- 4:
- RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
- j ra
- nop
- END(jsimd_h2v2_\colorid\()_merged_upsample_dspr2)
- .purgem STORE_H2V2_1_PIXEL
- .purgem STORE_H2V2_2_PIXELS
- .endm
- GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6
- GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6
- GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7
- GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7
- GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4
- GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4
- .macro GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 colorid, pixel_size, \
- r1_offs, g1_offs, \
- b1_offs, a1_offs, \
- r2_offs, g2_offs, \
- b2_offs, a2_offs
- .macro STORE_H2V1_2_PIXELS scratch0 scratch1 scratch2 scratch3 scratch4 \
- scratch5 outptr
- sb \scratch0, \r1_offs(\outptr)
- sb \scratch1, \g1_offs(\outptr)
- sb \scratch2, \b1_offs(\outptr)
- sb \scratch3, \r2_offs(\outptr)
- sb \scratch4, \g2_offs(\outptr)
- sb \scratch5, \b2_offs(\outptr)
- .if (\pixel_size == 8)
- li t0, 0xFF
- sb t0, \a1_offs(\outptr)
- sb t0, \a2_offs(\outptr)
- .endif
- addiu \outptr, \pixel_size
- .endm
- .macro STORE_H2V1_1_PIXEL scratch0 scratch1 scratch2 outptr
- sb \scratch0, \r1_offs(\outptr)
- sb \scratch1, \g1_offs(\outptr)
- sb \scratch2, \b1_offs(\outptr)
- .if (\pixel_size == 8)
- li t0, 0xFF
- sb t0, \a1_offs(\outptr)
- .endif
- .endm
- LEAF_DSPR2(jsimd_h2v1_\colorid\()_merged_upsample_dspr2)
- SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
- li t0, 0xe6ea
- lw t1, 0(a1)
- lw t2, 4(a1)
- lw t3, 8(a1)
- lw t8, 56(sp)
- addiu s1, t0, 0x7fff
- addiu s2, s1, 0x5eb9
- addiu s0, t0, 0x9916
- addiu s4, zero, 0xa7e6
- xori s3, s4, 0xeec8
- srl t0, a0, 1
- sll t4, a2, 2
- lwx s5, t4(t1)
- lwx s6, t4(t2)
- lwx s7, t4(t3)
- lw t7, 0(a3)
- blez t0, 2f
- addu t9, s6, t0
- 1:
- lbu t2, 0(s6)
- lbu t0, 0(s7)
- lbu t1, 0(s5)
- addiu t2, t2, -128
- addiu t0, t0, -128
- mult $ac1, s4, t2
- madd $ac1, s3, t0
- sll t0, t0, 15
- sll t2, t2, 15
- mulq_rs.w t0, s1, t0
- extr_r.w t5, $ac1, 16
- mulq_rs.w t6, s2, t2
- addiu s7, s7, 1
- addiu s6, s6, 1
- addu t2, t1, t0
- addu t3, t1, t5
- addu t4, t1, t6
- addu t2, t8, t2
- addu t3, t8, t3
- addu t4, t8, t4
- lbu t1, 1(s5)
- lbu v0, 0(t2)
- lbu v1, 0(t3)
- lbu ra, 0(t4)
- addu t2, t1, t0
- addu t3, t1, t5
- addu t4, t1, t6
- addu t2, t8, t2
- addu t3, t8, t3
- addu t4, t8, t4
- lbu t2, 0(t2)
- lbu t3, 0(t3)
- lbu t4, 0(t4)
- STORE_H2V1_2_PIXELS v0, v1, ra, t2, t3, t4, t7
- bne t9, s6, 1b
- addiu s5, s5, 2
- 2:
- andi t0, a0, 1
- beqz t0, 4f
- nop
- 3:
- lbu t2, 0(s6)
- lbu t0, 0(s7)
- lbu t1, 0(s5)
- addiu t2, t2, -128
- addiu t0, t0, -128
- mul t3, s4, t2
- mul t4, s3, t0
- sll t0, t0, 15
- sll t2, t2, 15
- mulq_rs.w t0, s1, t0
- mulq_rs.w t6, s2, t2
- addu t3, t3, s0
- addu t3, t4, t3
- sra t5, t3, 16
- addu t2, t1, t0
- addu t3, t1, t5
- addu t4, t1, t6
- addu t2, t8, t2
- addu t3, t8, t3
- addu t4, t8, t4
- lbu t2, 0(t2)
- lbu t3, 0(t3)
- lbu t4, 0(t4)
- STORE_H2V1_1_PIXEL t2, t3, t4, t7
- 4:
- RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
- j ra
- nop
- END(jsimd_h2v1_\colorid\()_merged_upsample_dspr2)
- .purgem STORE_H2V1_1_PIXEL
- .purgem STORE_H2V1_2_PIXELS
- .endm
- GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6
- GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6
- GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7
- GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7
- GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4
- GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4
- LEAF_DSPR2(jsimd_h2v2_fancy_upsample_dspr2)
- SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5
- li s4, 0
- lw s2, 0(a3)
- 0:
- li t9, 2
- lw s1, -4(a2)
- 1:
- lw s0, 0(a2)
- lwx s3, s4(s2)
- addiu s5, a1, -2
- srl t4, s5, 1
- sll t4, t4, 1
- lbu t0, 0(s0)
- lbu t1, 1(s0)
- lbu t2, 0(s1)
- lbu t3, 1(s1)
- addiu s0, 2
- addiu s1, 2
- addu t8, s0, t4
- andi s5, s5, 1
- sll t4, t0, 1
- sll t6, t1, 1
- addu t0, t0, t4
- addu t1, t1, t6
- addu t7, t0, t2
- addu t6, t1, t3
- sll t0, t7, 2
- subu t1, t0, t7
- shra_r.w t0, t0, 4
- addiu t1, 7
- addu t1, t1, t6
- srl t1, t1, 4
- sb t0, 0(s3)
- sb t1, 1(s3)
- beq t8, s0, 22f
- addiu s3, 2
- 2:
- lh t0, 0(s0)
- lh t2, 0(s1)
- addiu s0, 2
- addiu s1, 2
- preceu.ph.qbr t0, t0
- preceu.ph.qbr t2, t2
- shll.ph t1, t0, 1
- sll t3, t6, 1
- addu.ph t0, t1, t0
- addu t3, t3, t6
- addu.ph t0, t0, t2
- addu t1, t3, t7
- andi t7, t0, 0xFFFF
- sll t2, t7, 1
- addu t2, t7, t2
- addu t4, t2, t6
- srl t6, t0, 16
- shra_r.w t1, t1, 4
- addu t0, t3, t7
- addiu t0, 7
- srl t0, t0, 4
- shra_r.w t4, t4, 4
- addu t2, t2, t6
- addiu t2, 7
- srl t2, t2, 4
- sb t1, 0(s3)
- sb t0, 1(s3)
- sb t4, 2(s3)
- sb t2, 3(s3)
- bne t8, s0, 2b
- addiu s3, 4
- 22:
- beqz s5, 4f
- addu t8, s0, s5
- 3:
- lbu t0, 0(s0)
- lbu t2, 0(s1)
- addiu s0, 1
- addiu s1, 1
- sll t3, t6, 1
- sll t1, t0, 1
- addu t1, t0, t1
- addu t3, t3, t6
- addu t5, t1, t2
- addu t1, t3, t7
- shra_r.w t1, t1, 4
- addu t0, t3, t5
- addiu t0, 7
- srl t0, t0, 4
- sb t1, 0(s3)
- sb t0, 1(s3)
- addiu s3, 2
- move t7, t6
- bne t8, s0, 3b
- move t6, t5
- 4:
- sll t0, t6, 2
- subu t1, t0, t6
- addu t1, t1, t7
- addiu s4, 4
- shra_r.w t1, t1, 4
- addiu t0, 7
- srl t0, t0, 4
- sb t1, 0(s3)
- sb t0, 1(s3)
- addiu t9, -1
- addiu s3, 2
- bnez t9, 1b
- lw s1, 4(a2)
- srl t0, s4, 2
- subu t0, a0, t0
- bgtz t0, 0b
- addiu a2, 4
- RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5
- j ra
- nop
- END(jsimd_h2v2_fancy_upsample_dspr2)
- LEAF_DSPR2(jsimd_h2v1_fancy_upsample_dspr2)
- SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
- .set at
- beqz a0, 3f
- sll t0, a0, 2
- lw s1, 0(a3)
- li s3, 0x10001
- addu s0, s1, t0
- 0:
- addiu t8, a1, -2
- srl t9, t8, 2
- lw t7, 0(a2)
- lw s2, 0(s1)
- lbu t0, 0(t7)
- lbu t1, 1(t7)
- sll t2, t0, 1
- addu t2, t2, t0
- addu t2, t2, t1
- shra_r.w t2, t2, 2
- sb t0, 0(s2)
- sb t2, 1(s2)
- beqz t9, 11f
- addiu s2, 2
- 1:
- ulw t0, 0(t7)
- ulw t1, 1(t7)
- ulh t2, 4(t7)
- preceu.ph.qbl t3, t0
- preceu.ph.qbr t0, t0
- preceu.ph.qbr t2, t2
- preceu.ph.qbl t4, t1
- preceu.ph.qbr t1, t1
- shll.ph t5, t4, 1
- shll.ph t6, t1, 1
- addu.ph t5, t5, t4
- addu.ph t6, t6, t1
- addu.ph t4, t3, s3
- addu.ph t0, t0, s3
- addu.ph t4, t4, t5
- addu.ph t0, t0, t6
- shrl.ph t4, t4, 2
- shrl.ph t0, t0, 2
- addu.ph t2, t2, t5
- addu.ph t3, t3, t6
- shra_r.ph t2, t2, 2
- shra_r.ph t3, t3, 2
- shll.ph t2, t2, 8
- shll.ph t3, t3, 8
- or t2, t4, t2
- or t3, t3, t0
- addiu t9, -1
- usw t3, 0(s2)
- usw t2, 4(s2)
- addiu s2, 8
- bgtz t9, 1b
- addiu t7, 4
- 11:
- andi t8, 3
- beqz t8, 22f
- addiu t7, 1
- 2:
- lbu t0, 0(t7)
- addiu t7, 1
- sll t1, t0, 1
- addu t2, t0, t1
- lbu t3, -2(t7)
- lbu t4, 0(t7)
- addiu t3, 1
- addiu t4, 2
- addu t3, t3, t2
- addu t4, t4, t2
- srl t3, 2
- srl t4, 2
- sb t3, 0(s2)
- sb t4, 1(s2)
- addiu t8, -1
- bgtz t8, 2b
- addiu s2, 2
- 22:
- lbu t0, 0(t7)
- lbu t2, -1(t7)
- sll t1, t0, 1
- addu t1, t1, t0
- addu t1, t1, t2
- addiu t1, 1
- srl t1, t1, 2
- sb t1, 0(s2)
- sb t0, 1(s2)
- addiu s1, 4
- bne s1, s0, 0b
- addiu a2, 4
- 3:
- RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
- j ra
- nop
- END(jsimd_h2v1_fancy_upsample_dspr2)
- LEAF_DSPR2(jsimd_h2v1_downsample_dspr2)
- .set at
- SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4
- beqz a2, 7f
- lw s1, 44(sp)
- lw s0, 40(sp)
- srl s2, a0, 2
- andi t9, a0, 2
- srl t7, t9, 1
- addu s2, t7, s2
- sll t0, a3, 3
- srl t7, t0, 1
- subu s2, t7, s2
- 0:
- andi t6, a0, 1
- addiu t6, -1
- lw t4, 0(s1)
- lw t5, 0(s0)
- li s3, 0
- srl t7, a0, 1
- srl s4, t7, 2
- andi t8, t7, 3
- 1:
- ulhu t0, 0(t5)
- ulhu t1, 2(t5)
- ulhu t2, 4(t5)
- ulhu t3, 6(t5)
- raddu.w.qb t0, t0
- raddu.w.qb t1, t1
- raddu.w.qb t2, t2
- raddu.w.qb t3, t3
- shra.ph t0, t0, 1
- shra_r.ph t1, t1, 1
- shra.ph t2, t2, 1
- shra_r.ph t3, t3, 1
- sb t0, 0(t4)
- sb t1, 1(t4)
- sb t2, 2(t4)
- sb t3, 3(t4)
- addiu s4, -1
- addiu t4, 4
- bgtz s4, 1b
- addiu t5, 8
- beqz t8, 3f
- addu s4, t4, t8
- 2:
- ulhu t0, 0(t5)
- raddu.w.qb t0, t0
- addqh.w t0, t0, s3
- xori s3, s3, 1
- sb t0, 0(t4)
- addiu t4, 1
- bne t4, s4, 2b
- addiu t5, 2
- 3:
- lbux t1, t6(t5)
- sll t1, 1
- addqh.w t2, t1, s3
- xori s3, s3, 1
- addqh.w t3, t1, s3
- blez s2, 5f
- append t3, t2, 8
- addu t5, t4, s2
- 4:
- ush t3, 0(t4)
- addiu s2, -1
- bgtz s2, 4b
- addiu t4, 2
- 5:
- beqz t9, 6f
- nop
- sb t2, 0(t4)
- 6:
- addiu s1, 4
- addiu a2, -1
- bnez a2, 0b
- addiu s0, 4
- 7:
- RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4
- j ra
- nop
- END(jsimd_h2v1_downsample_dspr2)
- LEAF_DSPR2(jsimd_h2v2_downsample_dspr2)
- .set at
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- beqz a2, 8f
- lw s1, 52(sp)
- lw s0, 48(sp)
- andi t6, a0, 1
- addiu t6, -1
- srl t7, a0, 1
- srl s4, t7, 2
- andi t8, t7, 3
- andi t9, a0, 2
- srl s2, a0, 2
- srl t7, t9, 1
- addu s2, t7, s2
- sll t0, a3, 3
- srl t7, t0, 1
- subu s2, t7, s2
- 0:
- lw t4, 0(s1)
- lw t5, 0(s0)
- lw s7, 4(s0)
- li s6, 1
- 2:
- ulw t0, 0(t5)
- ulw t1, 0(s7)
- ulw t2, 4(t5)
- ulw t3, 4(s7)
- precrq.ph.w t7, t0, t1
- ins t0, t1, 16, 16
- raddu.w.qb t1, t7
- raddu.w.qb t0, t0
- shra_r.w t1, t1, 2
- addiu t0, 1
- srl t0, 2
- precrq.ph.w t7, t2, t3
- ins t2, t3, 16, 16
- raddu.w.qb t7, t7
- raddu.w.qb t2, t2
- shra_r.w t7, t7, 2
- addiu t2, 1
- srl t2, 2
- sb t0, 0(t4)
- sb t1, 1(t4)
- sb t2, 2(t4)
- sb t7, 3(t4)
- addiu t4, 4
- addiu t5, 8
- addiu s4, s4, -1
- bgtz s4, 2b
- addiu s7, 8
- beqz t8, 4f
- addu t8, t4, t8
- 3:
- ulhu t0, 0(t5)
- ulhu t1, 0(s7)
- ins t0, t1, 16, 16
- raddu.w.qb t0, t0
- addu t0, t0, s6
- srl t0, 2
- xori s6, s6, 3
- sb t0, 0(t4)
- addiu t5, 2
- addiu t4, 1
- bne t8, t4, 3b
- addiu s7, 2
- 4:
- lbux t1, t6(t5)
- sll t1, 1
- lbux t0, t6(s7)
- sll t0, 1
- addu t1, t1, t0
- addu t3, t1, s6
- srl t0, t3, 2
- xori s6, s6, 3
- addu t2, t1, s6
- srl t1, t2, 2
- blez s2, 6f
- append t1, t0, 8
- 5:
- ush t1, 0(t4)
- addiu s2, -1
- bgtz s2, 5b
- addiu t4, 2
- 6:
- beqz t9, 7f
- nop
- sb t0, 0(t4)
- 7:
- addiu s1, 4
- addiu a2, -1
- bnez a2, 0b
- addiu s0, 8
- 8:
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_h2v2_downsample_dspr2)
- LEAF_DSPR2(jsimd_h2v2_smooth_downsample_dspr2)
- .set at
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- lw s7, 52(sp)
- lw s0, 56(sp)
- lw s6, 48(sp)
- sll s7, 3
- sll v0, s7, 1
- subu v0, v0, s0
- blez v0, 2f
- move v1, zero
- addiu t0, a3, 2
- 0:
- addiu t1, a0, -4
- sll t2, v1, 2
- lwx t1, t2(t1)
- move t3, v0
- addu t1, t1, s0
- lbu t2, -1(t1)
- 1:
- addiu t3, t3, -1
- sb t2, 0(t1)
- bgtz t3, 1b
- addiu t1, t1, 1
- addiu v1, v1, 1
- bne v1, t0, 0b
- nop
- 2:
- li v0, 80
- mul v0, s6, v0
- li v1, 16384
- move t4, zero
- move t5, zero
- subu t6, v1, v0
- sll t7, s6, 4
- 3:
- sll v0, t4, 2
- lwx t8, v0(a1)
- sll v1, t5, 2
- addiu t9, v1, 4
- addiu s0, v1, -4
- addiu s1, v1, 8
- lwx s2, v1(a0)
- lwx t9, t9(a0)
- lwx s0, s0(a0)
- lwx s1, s1(a0)
- lh v0, 0(s2)
- lh v1, 0(t9)
- lh t0, 0(s0)
- lh t1, 0(s1)
- ins v0, v1, 16, 16
- ins t0, t1, 16, 16
- raddu.w.qb t2, v0
- raddu.w.qb s3, t0
- lbu v0, 0(s2)
- lbu v1, 2(s2)
- lbu t0, 0(t9)
- lbu t1, 2(t9)
- addu v0, v0, v1
- mult $ac1, t2, t6
- addu t0, t0, t1
- lbu t2, 2(s0)
- addu t0, t0, v0
- lbu t3, 2(s1)
- addu s3, t0, s3
- lbu v0, 0(s0)
- lbu t0, 0(s1)
- sll s3, s3, 1
- addu v0, v0, t2
- addu t0, t0, t3
- addu t0, t0, v0
- addu s3, t0, s3
- madd $ac1, s3, t7
- extr_r.w v0, $ac1, 16
- addiu t8, t8, 1
- addiu s2, s2, 2
- addiu t9, t9, 2
- addiu s0, s0, 2
- addiu s1, s1, 2
- sb v0, -1(t8)
- addiu s4, s7, -2
- and s4, s4, 3
- addu s5, s4, t8
- 4:
- lh v0, 0(s2)
- lh v1, 0(t9)
- lh t0, 0(s0)
- lh t1, 0(s1)
- ins v0, v1, 16, 16
- ins t0, t1, 16, 16
- raddu.w.qb t2, v0
- raddu.w.qb s3, t0
- lbu v0, -1(s2)
- lbu v1, 2(s2)
- lbu t0, -1(t9)
- lbu t1, 2(t9)
- addu v0, v0, v1
- mult $ac1, t2, t6
- addu t0, t0, t1
- lbu t2, 2(s0)
- addu t0, t0, v0
- lbu t3, 2(s1)
- addu s3, t0, s3
- lbu v0, -1(s0)
- lbu t0, -1(s1)
- sll s3, s3, 1
- addu v0, v0, t2
- addu t0, t0, t3
- addu t0, t0, v0
- addu s3, t0, s3
- madd $ac1, s3, t7
- extr_r.w t2, $ac1, 16
- addiu t8, t8, 1
- addiu s2, s2, 2
- addiu t9, t9, 2
- addiu s0, s0, 2
- sb t2, -1(t8)
- bne s5, t8, 4b
- addiu s1, s1, 2
- addiu s5, s7, -2
- subu s5, s5, s4
- addu s5, s5, t8
- 5:
- lh v0, 0(s2)
- lh v1, 0(t9)
- lh t0, 0(s0)
- lh t1, 0(s1)
- ins v0, v1, 16, 16
- ins t0, t1, 16, 16
- raddu.w.qb t2, v0
- raddu.w.qb s3, t0
- lbu v0, -1(s2)
- lbu v1, 2(s2)
- lbu t0, -1(t9)
- lbu t1, 2(t9)
- addu v0, v0, v1
- mult $ac1, t2, t6
- addu t0, t0, t1
- lbu t2, 2(s0)
- addu t0, t0, v0
- lbu t3, 2(s1)
- addu s3, t0, s3
- lbu v0, -1(s0)
- lbu t0, -1(s1)
- sll s3, s3, 1
- addu v0, v0, t2
- addu t0, t0, t3
- lh v1, 2(t9)
- addu t0, t0, v0
- lh v0, 2(s2)
- addu s3, t0, s3
- lh t0, 2(s0)
- lh t1, 2(s1)
- madd $ac1, s3, t7
- extr_r.w t2, $ac1, 16
- ins t0, t1, 16, 16
- ins v0, v1, 16, 16
- raddu.w.qb s3, t0
- lbu v1, 4(s2)
- lbu t0, 1(t9)
- lbu t1, 4(t9)
- sb t2, 0(t8)
- raddu.w.qb t3, v0
- lbu v0, 1(s2)
- addu t0, t0, t1
- mult $ac1, t3, t6
- addu v0, v0, v1
- lbu t2, 4(s0)
- addu t0, t0, v0
- lbu v0, 1(s0)
- addu s3, t0, s3
- lbu t0, 1(s1)
- lbu t3, 4(s1)
- addu v0, v0, t2
- sll s3, s3, 1
- addu t0, t0, t3
- lh v1, 4(t9)
- addu t0, t0, v0
- lh v0, 4(s2)
- addu s3, t0, s3
- lh t0, 4(s0)
- lh t1, 4(s1)
- madd $ac1, s3, t7
- extr_r.w t2, $ac1, 16
- ins t0, t1, 16, 16
- ins v0, v1, 16, 16
- raddu.w.qb s3, t0
- lbu v1, 6(s2)
- lbu t0, 3(t9)
- lbu t1, 6(t9)
- sb t2, 1(t8)
- raddu.w.qb t3, v0
- lbu v0, 3(s2)
- addu t0, t0, t1
- mult $ac1, t3, t6
- addu v0, v0, v1
- lbu t2, 6(s0)
- addu t0, t0, v0
- lbu v0, 3(s0)
- addu s3, t0, s3
- lbu t0, 3(s1)
- lbu t3, 6(s1)
- addu v0, v0, t2
- sll s3, s3, 1
- addu t0, t0, t3
- lh v1, 6(t9)
- addu t0, t0, v0
- lh v0, 6(s2)
- addu s3, t0, s3
- lh t0, 6(s0)
- lh t1, 6(s1)
- madd $ac1, s3, t7
- extr_r.w t3, $ac1, 16
- ins t0, t1, 16, 16
- ins v0, v1, 16, 16
- raddu.w.qb s3, t0
- lbu v1, 8(s2)
- lbu t0, 5(t9)
- lbu t1, 8(t9)
- sb t3, 2(t8)
- raddu.w.qb t2, v0
- lbu v0, 5(s2)
- addu t0, t0, t1
- mult $ac1, t2, t6
- addu v0, v0, v1
- lbu t2, 8(s0)
- addu t0, t0, v0
- lbu v0, 5(s0)
- addu s3, t0, s3
- lbu t0, 5(s1)
- lbu t3, 8(s1)
- addu v0, v0, t2
- sll s3, s3, 1
- addu t0, t0, t3
- addiu t8, t8, 4
- addu t0, t0, v0
- addiu s2, s2, 8
- addu s3, t0, s3
- addiu t9, t9, 8
- madd $ac1, s3, t7
- extr_r.w t1, $ac1, 16
- addiu s0, s0, 8
- addiu s1, s1, 8
- bne s5, t8, 5b
- sb t1, -1(t8)
- lh v0, 0(s2)
- lh v1, 0(t9)
- lh t0, 0(s0)
- lh t1, 0(s1)
- ins v0, v1, 16, 16
- ins t0, t1, 16, 16
- raddu.w.qb t2, v0
- raddu.w.qb s3, t0
- lbu v0, -1(s2)
- lbu v1, 1(s2)
- lbu t0, -1(t9)
- lbu t1, 1(t9)
- addu v0, v0, v1
- mult $ac1, t2, t6
- addu t0, t0, t1
- lbu t2, 1(s0)
- addu t0, t0, v0
- lbu t3, 1(s1)
- addu s3, t0, s3
- lbu v0, -1(s0)
- lbu t0, -1(s1)
- sll s3, s3, 1
- addu v0, v0, t2
- addu t0, t0, t3
- addu t0, t0, v0
- addu s3, t0, s3
- madd $ac1, s3, t7
- extr_r.w t0, $ac1, 16
- addiu t5, t5, 2
- sb t0, 0(t8)
- addiu t4, t4, 1
- bne t4, a2, 3b
- addiu t5, t5, 2
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_h2v2_smooth_downsample_dspr2)
- LEAF_DSPR2(jsimd_int_upsample_dspr2)
- .set at
- SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
- lw s0, 0(a3)
- lw s1, 32(sp)
- lw s2, 36(sp)
- li t6, 0
- beqz s2, 10f
- li s3, 0
- 0:
- addu t0, a2, t6
- addu t7, s0, s3
- lw t3, 0(t0)
- lw t8, 0(t7)
- beqz s1, 4f
- addu t5, t8, s1
- 1:
- lb t2, 0(t3)
- addiu t3, 1
- beqz a0, 3f
- move t0, a0
- 2:
- sb t2, 0(t8)
- addiu t0, -1
- bgtz t0, 2b
- addiu t8, 1
- 3:
- bgt t5, t8, 1b
- nop
- 4:
- addiu t9, a1, -1
- blez t9, 9f
- nop
- 5:
- lw t3, 0(s0)
- lw t4, 4(s0)
- subu t0, s1, 0xF
- blez t0, 7f
- addu t5, t3, s1
- andi t7, s1, 0xF
- subu t8, t5, t7
- 6:
- ulw t0, 0(t3)
- ulw t1, 4(t3)
- ulw t2, 8(t3)
- usw t0, 0(t4)
- ulw t0, 12(t3)
- usw t1, 4(t4)
- usw t2, 8(t4)
- usw t0, 12(t4)
- addiu t3, 16
- bne t3, t8, 6b
- addiu t4, 16
- beqz t7, 8f
- nop
- 7:
- lbu t0, 0(t3)
- sb t0, 0(t4)
- addiu t3, 1
- bne t3, t5, 7b
- addiu t4, 1
- 8:
- addiu t9, -1
- bgtz t9, 5b
- addiu s0, 8
- 9:
- addu s3, s3, a1
- bne s3, s2, 0b
- addiu t6, 1
- 10:
- RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
- j ra
- nop
- END(jsimd_int_upsample_dspr2)
- LEAF_DSPR2(jsimd_h2v1_upsample_dspr2)
- lw t7, 0(a3)
- andi t8, a1, 0xf
- sll t0, a0, 2
- blez a0, 4f
- addu t9, t7, t0
- 0:
- lw t5, 0(t7)
- lw t6, 0(a2)
- addu t3, t5, a1
- subu t3, t8
- beq t5, t3, 2f
- move t4, t8
- 1:
- ulw t0, 0(t6)
- ulw t2, 4(t6)
- srl t1, t0, 16
- ins t0, t0, 16, 16
- ins t1, t1, 16, 16
- ins t0, t0, 8, 16
- ins t1, t1, 8, 16
- usw t0, 0(t5)
- usw t1, 4(t5)
- srl t0, t2, 16
- ins t2, t2, 16, 16
- ins t0, t0, 16, 16
- ins t2, t2, 8, 16
- ins t0, t0, 8, 16
- usw t2, 8(t5)
- usw t0, 12(t5)
- addiu t5, 16
- bne t5, t3, 1b
- addiu t6, 8
- beqz t8, 3f
- move t4, t8
- 2:
- lbu t1, 0(t6)
- sb t1, 0(t5)
- sb t1, 1(t5)
- addiu t4, -2
- addiu t6, 1
- bgtz t4, 2b
- addiu t5, 2
- 3:
- addiu t7, 4
- bne t9, t7, 0b
- addiu a2, 4
- 4:
- j ra
- nop
- END(jsimd_h2v1_upsample_dspr2)
- LEAF_DSPR2(jsimd_h2v2_upsample_dspr2)
- lw t7, 0(a3)
- blez a0, 7f
- andi t9, a1, 0xf
- 0:
- lw t6, 0(a2)
- lw t5, 0(t7)
- addu t8, t5, a1
- subu t8, t9
- beq t5, t8, 2f
- move t4, t9
- 1:
- ulw t0, 0(t6)
- srl t1, t0, 16
- ins t0, t0, 16, 16
- ins t0, t0, 8, 16
- ins t1, t1, 16, 16
- ins t1, t1, 8, 16
- ulw t2, 4(t6)
- usw t0, 0(t5)
- usw t1, 4(t5)
- srl t3, t2, 16
- ins t2, t2, 16, 16
- ins t2, t2, 8, 16
- ins t3, t3, 16, 16
- ins t3, t3, 8, 16
- usw t2, 8(t5)
- usw t3, 12(t5)
- addiu t5, 16
- bne t5, t8, 1b
- addiu t6, 8
- beqz t9, 3f
- move t4, t9
- 2:
- lbu t0, 0(t6)
- sb t0, 0(t5)
- sb t0, 1(t5)
- addiu t4, -2
- addiu t6, 1
- bgtz t4, 2b
- addiu t5, 2
- 3:
- lw t6, 0(t7)
- lw t5, 4(t7)
- addu t4, t6, a1
- beq a1, t9, 5f
- subu t8, t4, t9
- 4:
- ulw t0, 0(t6)
- ulw t1, 4(t6)
- ulw t2, 8(t6)
- usw t0, 0(t5)
- ulw t0, 12(t6)
- usw t1, 4(t5)
- usw t2, 8(t5)
- usw t0, 12(t5)
- addiu t6, 16
- bne t6, t8, 4b
- addiu t5, 16
- beqz t9, 6f
- nop
- 5:
- lbu t0, 0(t6)
- sb t0, 0(t5)
- addiu t6, 1
- bne t6, t4, 5b
- addiu t5, 1
- 6:
- addiu t7, 8
- addiu a0, -2
- bgtz a0, 0b
- addiu a2, 4
- 7:
- j ra
- nop
- END(jsimd_h2v2_upsample_dspr2)
- LEAF_DSPR2(jsimd_idct_islow_dspr2)
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- addiu sp, sp, -256
- move v0, sp
- addiu v1, zero, 8
- 1:
- lh s4, 32(a0)
- lh s5, 64(a0)
- lh s6, 96(a0)
- lh t1, 112(a0)
- lh t7, 16(a0)
- lh t5, 80(a0)
- lh t3, 48(a0)
- or s4, s4, t1
- or s4, s4, t3
- or s4, s4, t5
- or s4, s4, t7
- or s4, s4, s5
- or s4, s4, s6
- bnez s4, 2f
- addiu v1, v1, -1
- lh s5, 0(a1)
- lh s6, 0(a0)
- mul s5, s5, s6
- sll s5, s5, 2
- sw s5, 0(v0)
- sw s5, 32(v0)
- sw s5, 64(v0)
- sw s5, 96(v0)
- sw s5, 128(v0)
- sw s5, 160(v0)
- sw s5, 192(v0)
- b 3f
- sw s5, 224(v0)
- 2:
- lh t0, 112(a1)
- lh t2, 48(a1)
- lh t4, 80(a1)
- lh t6, 16(a1)
- mul t0, t0, t1
- mul t1, t2, t3
- mul t2, t4, t5
- mul t3, t6, t7
- lh t4, 32(a1)
- lh t5, 32(a0)
- lh t6, 96(a1)
- lh t7, 96(a0)
- addu s0, t0, t1
- addu s1, t1, t2
- addu s2, t2, t3
- addu s3, s0, s2
- addiu t9, zero, 9633
- mul s3, s3, t9
- addu t8, t0, t3
- addiu t9, zero, 2446
- mul t0, t0, t9
- addiu t9, zero, 16819
- mul t2, t2, t9
- addiu t9, zero, 25172
- mul t1, t1, t9
- addiu t9, zero, 12299
- mul t3, t3, t9
- addiu t9, zero, 16069
- mul s0, s0, t9
- addiu t9, zero, 3196
- mul s2, s2, t9
- addiu t9, zero, 7373
- mul t8, t8, t9
- addiu t9, zero, 20995
- mul s1, s1, t9
- subu s0, s3, s0
- addu t0, t0, s0
- addu t1, t1, s0
- subu s2, s3, s2
- addu t2, t2, s2
- addu t3, t3, s2
- subu t0, t0, t8
- subu t1, t1, s1
- subu t2, t2, s1
- subu t3, t3, t8
- mul s0, t4, t5
- addiu t9, zero, 6270
- mul s1, t6, t7
- lh t4, 0(a1)
- lh t5, 0(a0)
- lh t6, 64(a1)
- lh t7, 64(a0)
- mul s2, t9, s0
- mul t5, t4, t5
- mul t6, t6, t7
- addiu t9, zero, 4433
- addu s3, s0, s1
- mul s3, s3, t9
- addiu t9, zero, 15137
- mul t8, s1, t9
- addu t4, t5, t6
- subu t5, t5, t6
- sll t4, t4, 13
- sll t5, t5, 13
- addu t7, s3, s2
- subu t6, s3, t8
- addu s0, t4, t7
- subu s1, t4, t7
- addu s2, t5, t6
- subu s3, t5, t6
- addu t4, s0, t3
- subu s0, s0, t3
- addu t3, s2, t1
- subu s2, s2, t1
- addu t1, s3, t2
- subu s3, s3, t2
- addu t2, s1, t0
- subu s1, s1, t0
- shra_r.w t4, t4, 11
- shra_r.w t3, t3, 11
- shra_r.w t1, t1, 11
- shra_r.w t2, t2, 11
- shra_r.w s1, s1, 11
- shra_r.w s3, s3, 11
- shra_r.w s2, s2, 11
- shra_r.w s0, s0, 11
- sw t4, 0(v0)
- sw t3, 32(v0)
- sw t1, 64(v0)
- sw t2, 96(v0)
- sw s1, 128(v0)
- sw s3, 160(v0)
- sw s2, 192(v0)
- sw s0, 224(v0)
- 3:
- addiu a1, a1, 2
- addiu a0, a0, 2
- bgtz v1, 1b
- addiu v0, v0, 4
- move v0, sp
- addiu v1, zero, 8
- 4:
- lw t0, 8(v0)
- lw t1, 24(v0)
- lw t2, 0(v0)
- lw t3, 16(v0)
- lw s4, 4(v0)
- lw s5, 12(v0)
- lw s6, 20(v0)
- lw s7, 28(v0)
- or s4, s4, t0
- or s4, s4, t1
- or s4, s4, t3
- or s4, s4, s7
- or s4, s4, s5
- or s4, s4, s6
- bnez s4, 5f
- addiu v1, v1, -1
- shra_r.w s5, t2, 5
- andi s5, s5, 0x3ff
- lbux s5, s5(a3)
- lw s1, 0(a2)
- replv.qb s5, s5
- usw s5, 0(s1)
- usw s5, 4(s1)
- b 6f
- nop
- 5:
- addu t4, t0, t1
- addiu t8, zero, 4433
- mul t5, t4, t8
- addiu t8, zero, 15137
- mul t1, t1, t8
- addiu t8, zero, 6270
- mul t0, t0, t8
- addu t4, t2, t3
- subu t2, t2, t3
- sll t4, t4, 13
- sll t2, t2, 13
- subu t1, t5, t1
- subu t3, t2, t1
- addu t2, t2, t1
- addu t5, t5, t0
- subu t1, t4, t5
- addu t0, t4, t5
- lw t4, 28(v0)
- lw t6, 12(v0)
- lw t5, 20(v0)
- lw t7, 4(v0)
- addu s0, t4, t6
- addiu t8, zero, 9633
- addu s1, t5, t7
- addu s2, s0, s1
- mul s2, s2, t8
- addu s3, t4, t7
- addu t9, t5, t6
- addiu t8, zero, 16069
- mul s0, s0, t8
- addiu t8, zero, 3196
- mul s1, s1, t8
- addiu t8, zero, 2446
- mul t4, t4, t8
- addiu t8, zero, 7373
- mul s3, s3, t8
- addiu t8, zero, 16819
- mul t5, t5, t8
- addiu t8, zero, 20995
- mul t9, t9, t8
- addiu t8, zero, 25172
- mul t6, t6, t8
- addiu t8, zero, 12299
- mul t7, t7, t8
- subu s0, s2, s0
- subu s1, s2, s1
- addu t4, t4, s0
- subu t4, t4, s3
- addu t5, t5, s1
- subu t5, t5, t9
- addu t6, t6, s0
- subu t6, t6, t9
- addu t7, t7, s1
- subu t7, t7, s3
- addu s0, t0, t7
- subu t0, t0, t7
- addu t7, t2, t6
- subu t2, t2, t6
- addu t6, t3, t5
- subu t3, t3, t5
- addu t5, t1, t4
- subu t1, t1, t4
- shra_r.w s0, s0, 18
- shra_r.w t7, t7, 18
- shra_r.w t6, t6, 18
- shra_r.w t5, t5, 18
- shra_r.w t1, t1, 18
- shra_r.w t3, t3, 18
- shra_r.w t2, t2, 18
- shra_r.w t0, t0, 18
- andi s0, s0, 0x3ff
- andi t7, t7, 0x3ff
- andi t6, t6, 0x3ff
- andi t5, t5, 0x3ff
- andi t1, t1, 0x3ff
- andi t3, t3, 0x3ff
- andi t2, t2, 0x3ff
- andi t0, t0, 0x3ff
- lw s1, 0(a2)
- lbux s0, s0(a3)
- lbux t7, t7(a3)
- lbux t6, t6(a3)
- lbux t5, t5(a3)
- lbux t1, t1(a3)
- lbux t3, t3(a3)
- lbux t2, t2(a3)
- lbux t0, t0(a3)
- sb s0, 0(s1)
- sb t7, 1(s1)
- sb t6, 2(s1)
- sb t5, 3(s1)
- sb t1, 4(s1)
- sb t3, 5(s1)
- sb t2, 6(s1)
- sb t0, 7(s1)
- 6:
- addiu v0, v0, 32
- bgtz v1, 4b
- addiu a2, a2, 4
- addiu sp, sp, 256
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_idct_islow_dspr2)
- LEAF_DSPR2(jsimd_idct_ifast_cols_dspr2)
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- addiu t9, a0, 16
- or AT, a3, zero
- 0:
- lw s0, 0(a1)
- lw t0, 0(a0)
- lw t1, 16(a0)
- muleq_s.w.phl v0, t0, s0
- lw t2, 32(a0)
- lw t3, 48(a0)
- lw t4, 64(a0)
- lw t5, 80(a0)
- muleq_s.w.phr t0, t0, s0
- lw t6, 96(a0)
- lw t7, 112(a0)
- or s4, t1, t2
- or s5, t3, t4
- bnez s4, 1f
- ins t0, v0, 16, 16
- bnez s5, 1f
- or s6, t5, t6
- or s6, s6, t7
- bnez s6, 1f
- sw t0, 0(a2)
- sw t0, 16(a2)
- sw t0, 32(a2)
- sw t0, 48(a2)
- sw t0, 64(a2)
- sw t0, 80(a2)
- sw t0, 96(a2)
- sw t0, 112(a2)
- addiu a0, a0, 4
- b 2f
- addiu a1, a1, 4
- 1:
- lw s1, 32(a1)
- lw s2, 64(a1)
- muleq_s.w.phl v0, t2, s1
- muleq_s.w.phr t2, t2, s1
- lw s0, 16(a1)
- lw s1, 48(a1)
- lw s3, 96(a1)
- muleq_s.w.phl v1, t4, s2
- muleq_s.w.phr t4, t4, s2
- lw s2, 80(a1)
- lw t8, 4(AT)
- ins t2, v0, 16, 16
- muleq_s.w.phl v0, t6, s3
- muleq_s.w.phr t6, t6, s3
- ins t4, v1, 16, 16
- addq.ph s4, t0, t4
- subq.ph s5, t0, t4
- ins t6, v0, 16, 16
- subq.ph s6, t2, t6
- addq.ph s7, t2, t6
- mulq_s.ph s6, s6, t8
- addq.ph t0, s4, s7
- subq.ph t6, s4, s7
- muleq_s.w.phl v0, t1, s0
- muleq_s.w.phr t1, t1, s0
- shll_s.ph s6, s6, 1
- lw s3, 112(a1)
- subq.ph s6, s6, s7
- muleq_s.w.phl v1, t7, s3
- muleq_s.w.phr t7, t7, s3
- ins t1, v0, 16, 16
- addq.ph t2, s5, s6
- subq.ph t4, s5, s6
- muleq_s.w.phl v0, t5, s2
- muleq_s.w.phr t5, t5, s2
- ins t7, v1, 16, 16
- addq.ph s5, t1, t7
- subq.ph s6, t1, t7
- muleq_s.w.phl v1, t3, s1
- muleq_s.w.phr t3, t3, s1
- ins t5, v0, 16, 16
- ins t3, v1, 16, 16
- addq.ph s7, t5, t3
- subq.ph v0, t5, t3
- addq.ph t7, s5, s7
- subq.ph s5, s5, s7
- addq.ph v1, v0, s6
- mulq_s.ph s5, s5, t8
- lw t8, 8(AT)
- lw s4, 0(AT)
- addq.ph s0, t0, t7
- subq.ph s1, t0, t7
- mulq_s.ph v1, v1, t8
- shll_s.ph s5, s5, 1
- lw t8, 12(AT)
- sw s0, 0(a2)
- shll_s.ph v0, v0, 1
- mulq_s.ph v0, v0, t8
- mulq_s.ph s4, s6, s4
- shll_s.ph v1, v1, 1
- addiu a0, a0, 4
- addiu a1, a1, 4
- sw s1, 112(a2)
- shll_s.ph s6, v0, 1
- shll_s.ph s4, s4, 1
- addq.ph s6, s6, v1
- subq.ph t5, s6, t7
- subq.ph s4, s4, v1
- subq.ph t3, s5, t5
- addq.ph s2, t2, t5
- addq.ph t1, s4, t3
- subq.ph s3, t2, t5
- sw s2, 16(a2)
- sw s3, 96(a2)
- addq.ph v0, t4, t3
- subq.ph v1, t4, t3
- sw v0, 32(a2)
- sw v1, 80(a2)
- addq.ph v0, t6, t1
- subq.ph v1, t6, t1
- sw v0, 64(a2)
- sw v1, 48(a2)
- 2:
- bne a0, t9, 0b
- addiu a2, a2, 4
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_idct_ifast_cols_dspr2)
- LEAF_DSPR2(jsimd_idct_ifast_rows_dspr2)
- SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3
- addiu t9, a0, 128
- lui s8, 0x8080
- ori s8, s8, 0x8080
- 0:
- lw AT, 36(sp)
- lw t0, 0(a0)
- lw s0, 16(a0)
- lw t2, 4(a0)
- lw s2, 20(a0)
- lw t4, 8(a0)
- lw s4, 24(a0)
- lw t6, 12(a0)
- lw s6, 28(a0)
- precrq.ph.w t1, s0, t0
- ins t0, s0, 16, 16
- bnez t1, 1f
- or s0, t2, s2
- bnez s0, 1f
- or s0, t4, s4
- bnez s0, 1f
- or s0, t6, s6
- bnez s0, 1f
- shll_s.ph s0, t0, 2
- lw a3, 0(a1)
- lw AT, 4(a1)
- precrq.ph.w t0, s0, s0
- ins s0, s0, 16, 16
- addu a3, a3, a2
- addu AT, AT, a2
- precrq.qb.ph t0, t0, t0
- precrq.qb.ph s0, s0, s0
- addu.qb s0, s0, s8
- addu.qb t0, t0, s8
- sw s0, 0(a3)
- sw s0, 4(a3)
- sw t0, 0(AT)
- sw t0, 4(AT)
- addiu a0, a0, 32
- bne a0, t9, 0b
- addiu a1, a1, 8
- b 2f
- nop
- 1:
- precrq.ph.w t3, s2, t2
- ins t2, s2, 16, 16
- precrq.ph.w t5, s4, t4
- ins t4, s4, 16, 16
- precrq.ph.w t7, s6, t6
- ins t6, s6, 16, 16
- lw t8, 4(AT)
- addq.ph s4, t0, t4
- subq.ph s5, t0, t4
- subq.ph s6, t2, t6
- addq.ph s7, t2, t6
- mulq_s.ph s6, s6, t8
- addq.ph t0, s4, s7
- subq.ph t6, s4, s7
- shll_s.ph s6, s6, 1
- subq.ph s6, s6, s7
- addq.ph t2, s5, s6
- subq.ph t4, s5, s6
- addq.ph s5, t1, t7
- subq.ph s6, t1, t7
- addq.ph s7, t5, t3
- subq.ph v0, t5, t3
- addq.ph t7, s5, s7
- subq.ph s5, s5, s7
- addq.ph v1, v0, s6
- mulq_s.ph s5, s5, t8
- lw t8, 8(AT)
- lw s4, 0(AT)
- addq.ph s0, t0, t7
- subq.ph s7, t0, t7
- mulq_s.ph v1, v1, t8
- lw a3, 0(a1)
- lw t8, 12(AT)
- shll_s.ph s5, s5, 1
- addu a3, a3, a2
- shll_s.ph v0, v0, 1
- mulq_s.ph v0, v0, t8
- mulq_s.ph s4, s6, s4
- shll_s.ph v1, v1, 1
- addiu a0, a0, 32
- addiu a1, a1, 8
- shll_s.ph s6, v0, 1
- shll_s.ph s4, s4, 1
- addq.ph s6, s6, v1
- shll_s.ph s0, s0, 2
- subq.ph t5, s6, t7
- subq.ph s4, s4, v1
- subq.ph t3, s5, t5
- shll_s.ph s7, s7, 2
- addq.ph t1, s4, t3
- addq.ph s1, t2, t5
- subq.ph s6, t2, t5
- addq.ph s2, t4, t3
- subq.ph s5, t4, t3
- addq.ph s4, t6, t1
- subq.ph s3, t6, t1
- shll_s.ph s1, s1, 2
- shll_s.ph s2, s2, 2
- shll_s.ph s3, s3, 2
- shll_s.ph s4, s4, 2
- shll_s.ph s5, s5, 2
- shll_s.ph s6, s6, 2
- precrq.ph.w t0, s1, s0
- ins s0, s1, 16, 16
- precrq.ph.w t2, s3, s2
- ins s2, s3, 16, 16
- precrq.ph.w t4, s5, s4
- ins s4, s5, 16, 16
- precrq.ph.w t6, s7, s6
- ins s6, s7, 16, 16
- precrq.qb.ph t0, t2, t0
- precrq.qb.ph s0, s2, s0
- precrq.qb.ph t4, t6, t4
- precrq.qb.ph s4, s6, s4
- addu.qb s0, s0, s8
- addu.qb s4, s4, s8
- sw s0, 0(a3)
- sw s4, 4(a3)
- lw a3, -4(a1)
- addu.qb t0, t0, s8
- addu a3, a3, a2
- addu.qb t4, t4, s8
- sw t0, 0(a3)
- bne a0, t9, 0b
- sw t4, 4(a3)
- 2:
- RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3
- j ra
- nop
- END(jsimd_idct_ifast_rows_dspr2)
- LEAF_DSPR2(jsimd_fdct_islow_dspr2)
- SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8
- lui t0, 6437
- ori t0, 2260
- lui t1, 9633
- ori t1, 11363
- lui t2, 0xd39e
- ori t2, 0xe6dc
- lui t3, 0xf72d
- ori t3, 9633
- lui t4, 2261
- ori t4, 9633
- lui t5, 0xd39e
- ori t5, 6437
- lui t6, 9633
- ori t6, 0xd39d
- lui t7, 0xe6dc
- ori t7, 2260
- lui t8, 4433
- ori t8, 10703
- lui t9, 0xd630
- ori t9, 4433
- li s8, 8
- move a1, a0
- 1:
- lw s0, 0(a1)
- lw s1, 4(a1)
- lw s2, 8(a1)
- lw s3, 12(a1)
- packrl.ph s1, s1, s1
- packrl.ph s3, s3, s3
- subq.ph s7, s1, s2
- subq.ph s5, s0, s3
- mult $0, $0
- dpa.w.ph $ac0, s7, t0
- dpa.w.ph $ac0, s5, t1
- mult $ac1, $0, $0
- dpa.w.ph $ac1, s7, t2
- dpa.w.ph $ac1, s5, t3
- mult $ac2, $0, $0
- dpa.w.ph $ac2, s7, t4
- dpa.w.ph $ac2, s5, t5
- mult $ac3, $0, $0
- dpa.w.ph $ac3, s7, t6
- dpa.w.ph $ac3, s5, t7
- addq.ph s6, s1, s2
- addq.ph s4, s0, s3
- extr_r.w s0, $ac0, 11
- extr_r.w s1, $ac1, 11
- extr_r.w s2, $ac2, 11
- extr_r.w s3, $ac3, 11
- addq.ph s5, s4, s6
- subq.ph s7, s4, s6
- sh s0, 2(a1)
- sh s1, 6(a1)
- sh s2, 10(a1)
- sh s3, 14(a1)
- mult $0, $0
- dpa.w.ph $ac0, s7, t8
- mult $ac1, $0, $0
- dpa.w.ph $ac1, s7, t9
- sra s4, s5, 16
- addiu a1, a1, 16
- addiu s8, s8, -1
- extr_r.w s0, $ac0, 11
- extr_r.w s1, $ac1, 11
- addu s2, s5, s4
- subu s3, s5, s4
- sll s2, s2, 2
- sll s3, s3, 2
- sh s2, -16(a1)
- sh s3, -8(a1)
- sh s0, -12(a1)
- bgtz s8, 1b
- sh s1, -4(a1)
- li t0, 2260
- li t1, 11363
- li t2, 9633
- li t3, 6436
- li t4, 6437
- li t5, 2261
- li t6, 11362
- li t7, 2259
- li t8, 4433
- li t9, 10703
- li a1, 10704
- li s8, 8
- 2:
- lh a2, 0(a0)
- lh a3, 16(a0)
- lh v0, 32(a0)
- lh v1, 48(a0)
- lh s4, 64(a0)
- lh s5, 80(a0)
- lh s6, 96(a0)
- lh s7, 112(a0)
- addu s2, v0, s5
- subu s5, v0, s5
- addu s3, v1, s4
- subu s4, v1, s4
- addu s0, a2, s7
- subu s7, a2, s7
- addu s1, a3, s6
- subu s6, a3, s6
- addu a2, s0, s3
- subu v1, s0, s3
- addu a3, s1, s2
- subu v0, s1, s2
- mult s7, t1
- madd s4, t0
- madd s5, t4
- madd s6, t2
- mult $ac1, s7, t2
- msub $ac1, s4, t3
- msub $ac1, s5, t6
- msub $ac1, s6, t7
- mult $ac2, s7, t4
- madd $ac2, s4, t2
- madd $ac2, s5, t5
- msub $ac2, s6, t6
- mult $ac3, s7, t0
- msub $ac3, s4, t1
- madd $ac3, s5, t2
- msub $ac3, s6, t3
- extr_r.w s0, $ac0, 15
- extr_r.w s1, $ac1, 15
- extr_r.w s2, $ac2, 15
- extr_r.w s3, $ac3, 15
- addiu s8, s8, -1
- addu s4, a2, a3
- subu s5, a2, a3
- sh s0, 16(a0)
- sh s1, 48(a0)
- sh s2, 80(a0)
- sh s3, 112(a0)
- mult v0, t8
- madd v1, t9
- mult $ac1, v1, t8
- msub $ac1, v0, a1
- addiu a0, a0, 2
- extr_r.w s6, $ac0, 15
- extr_r.w s7, $ac1, 15
- shra_r.w s4, s4, 2
- shra_r.w s5, s5, 2
- sh s4, -2(a0)
- sh s5, 62(a0)
- sh s6, 30(a0)
- bgtz s8, 2b
- sh s7, 94(a0)
- RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8
- jr ra
- nop
- END(jsimd_fdct_islow_dspr2)
- LEAF_DSPR2(jsimd_fdct_ifast_dspr2)
- .set at
- SAVE_REGS_ON_STACK 8, s0, s1
- li a1, 0x014e014e
- li a2, 0x008b008b
- li a3, 0x00620062
- li s1, 0x00b500b5
- move v0, a0
- addiu v1, v0, 128
- 0:
- lw t0, 0(v0)
- lw t1, 4(v0)
- lw t2, 8(v0)
- lw t3, 12(v0)
- packrl.ph t1, t1, t1
- packrl.ph t3, t3, t3
- subq.ph t7, t1, t2
- subq.ph t5, t0, t3
- addq.ph t6, t1, t2
- addq.ph t4, t0, t3
- addq.ph t8, t4, t6
- subq.ph t9, t4, t6
- sra t4, t8, 16
- mult $0, $0
- dpa.w.ph $ac0, t9, s1
- mult $ac1, $0, $0
- dpa.w.ph $ac1, t7, a3
- dpsx.w.ph $ac1, t5, a3
- mult $ac2, $0, $0
- dpa.w.ph $ac2, t7, a2
- mult $ac3, $0, $0
- dpa.w.ph $ac3, t5, a1
- precrq.ph.w t0, t5, t7
- addq.ph t2, t8, t4
- subq.ph t3, t8, t4
- extr.w t4, $ac0, 8
- mult $0, $0
- dpa.w.ph $ac0, t0, s1
- extr.w t0, $ac1, 8
- extr.w t1, $ac2, 8
- extr.w t7, $ac3, 8
- extr.w t8, $ac0, 8
- add t6, t1, t0
- add t7, t7, t0
- subq.ph t0, t5, t8
- addq.ph t8, t5, t8
- addq.ph t1, t0, t6
- subq.ph t6, t0, t6
- addq.ph t0, t8, t7
- subq.ph t7, t8, t7
- addq.ph t5, t4, t9
- subq.ph t4, t9, t4
- sh t2, 0(v0)
- sh t5, 4(v0)
- sh t3, 8(v0)
- sh t4, 12(v0)
- sh t1, 10(v0)
- sh t6, 6(v0)
- sh t0, 2(v0)
- sh t7, 14(v0)
- addiu v0, 16
- bne v1, v0, 0b
- nop
- move v0, a0
- addiu v1, v0, 16
- 1:
- lh t0, 0(v0)
- lh t1, 16(v0)
- lh t2, 32(v0)
- lh t3, 48(v0)
- lh t4, 64(v0)
- lh t5, 80(v0)
- lh t6, 96(v0)
- lh t7, 112(v0)
- add t8, t0, t7
- sub t7, t0, t7
- add t0, t1, t6
- sub t1, t1, t6
- add t6, t2, t5
- sub t5, t2, t5
- add t2, t3, t4
- sub t3, t3, t4
- add t4, t8, t2
- sub t8, t8, t2
- sub s0, t0, t6
- ins t8, s0, 16, 16
- add t2, t0, t6
- mult $0, $0
- dpa.w.ph $ac0, t8, s1
- add s0, t4, t2
- sub t4, t4, t2
- sh s0, 0(v0)
- sh t4, 64(v0)
- extr.w t2, $ac0, 8
- addq.ph t4, t8, t2
- subq.ph t8, t8, t2
- sh t4, 32(v0)
- sh t8, 96(v0)
- add t3, t3, t5
- add t0, t5, t1
- add t1, t1, t7
- andi t4, a1, 0xffff
- mul s0, t1, t4
- sra s0, s0, 8
- ins t1, t3, 16, 16
- mult $0, $0
- mulsa.w.ph $ac0, t1, a3
- extr.w t8, $ac0, 8
- add t2, t7, t8
- sub t7, t7, t8
- andi t4, a2, 0xffff
- mul t8, t3, t4
- sra t8, t8, 8
- andi t4, s1, 0xffff
- mul t6, t0, t4
- sra t6, t6, 8
- add t0, t6, t8
- sub t1, t6, t8
- add t3, t6, s0
- sub t4, t6, s0
- sub t5, t2, t1
- sub t6, t7, t0
- add t3, t2, t3
- add t4, t7, t4
- sh t5, 80(v0)
- sh t6, 48(v0)
- sh t3, 16(v0)
- sh t4, 112(v0)
- addiu v0, 2
- bne v0, v1, 1b
- nop
- RESTORE_REGS_FROM_STACK 8, s0, s1
- j ra
- nop
- END(jsimd_fdct_ifast_dspr2)
- LEAF_DSPR2(jsimd_quantize_dspr2)
- .set at
- SAVE_REGS_ON_STACK 16, s0, s1, s2
- addiu v0, a2, 124
- lh t0, 0(a2)
- lh t1, 0(a1)
- lh t2, 128(a1)
- sra t3, t0, 15
- sll t3, t3, 1
- addiu t3, t3, 1
- mul t0, t0, t3
- lh t4, 384(a1)
- lh t5, 130(a1)
- lh t6, 2(a2)
- lh t7, 2(a1)
- lh t8, 386(a1)
- 1:
- andi t1, 0xffff
- add t9, t0, t2
- andi t9, 0xffff
- mul v1, t9, t1
- sra s0, t6, 15
- sll s0, s0, 1
- addiu s0, s0, 1
- addiu t9, t4, 16
- srav v1, v1, t9
- mul v1, v1, t3
- mul t6, t6, s0
- andi t7, 0xffff
- addiu a2, a2, 4
- addiu a1, a1, 4
- add s1, t6, t5
- andi s1, 0xffff
- sh v1, 0(a0)
- mul s2, s1, t7
- addiu s1, t8, 16
- srav s2, s2, s1
- mul s2, s2, s0
- lh t0, 0(a2)
- lh t1, 0(a1)
- sra t3, t0, 15
- sll t3, t3, 1
- addiu t3, t3, 1
- mul t0, t0, t3
- lh t2, 128(a1)
- lh t4, 384(a1)
- lh t5, 130(a1)
- lh t8, 386(a1)
- lh t6, 2(a2)
- lh t7, 2(a1)
- sh s2, 2(a0)
- lh t0, 0(a2)
- sra t3, t0, 15
- sll t3, t3, 1
- addiu t3, t3, 1
- mul t0, t0, t3
- bne a2, v0, 1b
- addiu a0, a0, 4
- andi t1, 0xffff
- add t9, t0, t2
- andi t9, 0xffff
- mul v1, t9, t1
- sra s0, t6, 15
- sll s0, s0, 1
- addiu s0, s0, 1
- addiu t9, t4, 16
- srav v1, v1, t9
- mul v1, v1, t3
- mul t6, t6, s0
- andi t7, 0xffff
- sh v1, 0(a0)
- add s1, t6, t5
- andi s1, 0xffff
- mul s2, s1, t7
- addiu s1, t8, 16
- addiu a2, a2, 4
- addiu a1, a1, 4
- srav s2, s2, s1
- mul s2, s2, s0
- sh s2, 2(a0)
- RESTORE_REGS_FROM_STACK 16, s0, s1, s2
- j ra
- nop
- END(jsimd_quantize_dspr2)
- #ifndef __mips_soft_float
- LEAF_DSPR2(jsimd_quantize_float_dspr2)
- .set at
- li t1, 0x46800100
- mtc1 t1, f0
- li t0, 63
- 0:
- lwc1 f2, 0(a2)
- lwc1 f10, 0(a1)
- lwc1 f4, 4(a2)
- lwc1 f12, 4(a1)
- lwc1 f6, 8(a2)
- lwc1 f14, 8(a1)
- lwc1 f8, 12(a2)
- lwc1 f16, 12(a1)
- madd.s f2, f0, f2, f10
- madd.s f4, f0, f4, f12
- madd.s f6, f0, f6, f14
- madd.s f8, f0, f8, f16
- lwc1 f10, 16(a1)
- lwc1 f12, 20(a1)
- trunc.w.s f2, f2
- trunc.w.s f4, f4
- trunc.w.s f6, f6
- trunc.w.s f8, f8
- lwc1 f14, 24(a1)
- lwc1 f16, 28(a1)
- mfc1 t1, f2
- mfc1 t2, f4
- mfc1 t3, f6
- mfc1 t4, f8
- lwc1 f2, 16(a2)
- lwc1 f4, 20(a2)
- lwc1 f6, 24(a2)
- lwc1 f8, 28(a2)
- madd.s f2, f0, f2, f10
- madd.s f4, f0, f4, f12
- madd.s f6, f0, f6, f14
- madd.s f8, f0, f8, f16
- addiu t1, t1, -16384
- addiu t2, t2, -16384
- addiu t3, t3, -16384
- addiu t4, t4, -16384
- trunc.w.s f2, f2
- trunc.w.s f4, f4
- trunc.w.s f6, f6
- trunc.w.s f8, f8
- sh t1, 0(a0)
- sh t2, 2(a0)
- sh t3, 4(a0)
- sh t4, 6(a0)
- mfc1 t1, f2
- mfc1 t2, f4
- mfc1 t3, f6
- mfc1 t4, f8
- addiu t0, t0, -8
- addiu a2, a2, 32
- addiu a1, a1, 32
- addiu t1, t1, -16384
- addiu t2, t2, -16384
- addiu t3, t3, -16384
- addiu t4, t4, -16384
- sh t1, 8(a0)
- sh t2, 10(a0)
- sh t3, 12(a0)
- sh t4, 14(a0)
- bgez t0, 0b
- addiu a0, a0, 16
- j ra
- nop
- END(jsimd_quantize_float_dspr2)
- #endif
- LEAF_DSPR2(jsimd_idct_2x2_dspr2)
- .set at
- SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5
- addiu sp, sp, -40
- move v0, sp
- addiu s2, zero, 29692
- addiu s3, zero, -10426
- addiu s4, zero, 6967
- addiu s5, zero, -5906
- lh t0, 0(a1)
- lh t5, 0(a0)
- lh t1, 48(a1)
- lh t6, 48(a0)
- mul t4, t5, t0
- lh t0, 16(a1)
- lh t5, 16(a0)
- mul t6, t6, t1
- mul t5, t5, t0
- lh t2, 80(a1)
- lh t7, 80(a0)
- lh t3, 112(a1)
- lh t8, 112(a0)
- mul t7, t7, t2
- mult zero, zero
- mul t8, t8, t3
- li s0, 0x73FCD746
- li s1, 0x1B37E8EE
- ins t6, t5, 16, 16
- sll t4, t4, 15
- dpa.w.ph $ac0, t6, s0
- lh t1, 2(a1)
- lh t6, 2(a0)
- ins t8, t7, 16, 16
- dpa.w.ph $ac0, t8, s1
- mflo t0, $ac0
- mul t5, t6, t1
- lh t1, 18(a1)
- lh t6, 18(a0)
- lh t2, 50(a1)
- lh t7, 50(a0)
- mul t6, t6, t1
- subu t8, t4, t0
- mul t7, t7, t2
- addu t0, t4, t0
- shra_r.w t0, t0, 13
- lh t1, 82(a1)
- lh t2, 82(a0)
- lh t3, 114(a1)
- lh t4, 114(a0)
- shra_r.w t8, t8, 13
- mul t1, t1, t2
- mul t3, t3, t4
- sw t0, 0(v0)
- sw t8, 20(v0)
- sll t4, t5, 15
- ins t7, t6, 16, 16
- mult zero, zero
- dpa.w.ph $ac0, t7, s0
- ins t3, t1, 16, 16
- lh t1, 6(a1)
- lh t6, 6(a0)
- dpa.w.ph $ac0, t3, s1
- mflo t0, $ac0
- mul t5, t6, t1
- lh t1, 22(a1)
- lh t6, 22(a0)
- lh t2, 54(a1)
- lh t7, 54(a0)
- mul t6, t6, t1
- subu t8, t4, t0
- mul t7, t7, t2
- addu t0, t4, t0
- shra_r.w t0, t0, 13
- lh t1, 86(a1)
- lh t2, 86(a0)
- lh t3, 118(a1)
- lh t4, 118(a0)
- shra_r.w t8, t8, 13
- mul t1, t1, t2
- mul t3, t3, t4
- sw t0, 4(v0)
- sw t8, 24(v0)
- sll t4, t5, 15
- ins t7, t6, 16, 16
- mult zero, zero
- dpa.w.ph $ac0, t7, s0
- ins t3, t1, 16, 16
- lh t1, 10(a1)
- lh t6, 10(a0)
- dpa.w.ph $ac0, t3, s1
- mflo t0, $ac0
- mul t5, t6, t1
- lh t1, 26(a1)
- lh t6, 26(a0)
- lh t2, 58(a1)
- lh t7, 58(a0)
- mul t6, t6, t1
- subu t8, t4, t0
- mul t7, t7, t2
- addu t0, t4, t0
- shra_r.w t0, t0, 13
- lh t1, 90(a1)
- lh t2, 90(a0)
- lh t3, 122(a1)
- lh t4, 122(a0)
- shra_r.w t8, t8, 13
- mul t1, t1, t2
- mul t3, t3, t4
- sw t0, 8(v0)
- sw t8, 28(v0)
- sll t4, t5, 15
- ins t7, t6, 16, 16
- mult zero, zero
- dpa.w.ph $ac0, t7, s0
- ins t3, t1, 16, 16
- lh t1, 14(a1)
- lh t6, 14(a0)
- dpa.w.ph $ac0, t3, s1
- mflo t0, $ac0
- mul t5, t6, t1
- lh t1, 30(a1)
- lh t6, 30(a0)
- lh t2, 62(a1)
- lh t7, 62(a0)
- mul t6, t6, t1
- subu t8, t4, t0
- mul t7, t7, t2
- addu t0, t4, t0
- shra_r.w t0, t0, 13
- lh t1, 94(a1)
- lh t2, 94(a0)
- lh t3, 126(a1)
- lh t4, 126(a0)
- shra_r.w t8, t8, 13
- mul t1, t1, t2
- mul t3, t3, t4
- sw t0, 12(v0)
- sw t8, 32(v0)
- sll t4, t5, 15
- ins t7, t6, 16, 16
- mult zero, zero
- dpa.w.ph $ac0, t7, s0
- ins t3, t1, 16, 16
- dpa.w.ph $ac0, t3, s1
- mflo t0, $ac0
- lw t9, 0(a2)
- lw t3, 0(v0)
- lw t7, 4(v0)
- lw t1, 8(v0)
- addu t9, t9, a3
- sll t3, t3, 15
- subu t8, t4, t0
- addu t0, t4, t0
- shra_r.w t0, t0, 13
- shra_r.w t8, t8, 13
- sw t0, 16(v0)
- sw t8, 36(v0)
- lw t5, 12(v0)
- lw t6, 16(v0)
- mult t7, s2
- madd t1, s3
- madd t5, s4
- madd t6, s5
- lw t5, 24(v0)
- lw t7, 28(v0)
- mflo t0, $ac0
- lw t8, 32(v0)
- lw t2, 36(v0)
- mult $ac1, t5, s2
- madd $ac1, t7, s3
- madd $ac1, t8, s4
- madd $ac1, t2, s5
- addu t1, t3, t0
- subu t6, t3, t0
- shra_r.w t1, t1, 20
- shra_r.w t6, t6, 20
- mflo t4, $ac1
- shll_s.w t1, t1, 24
- shll_s.w t6, t6, 24
- sra t1, t1, 24
- sra t6, t6, 24
- addiu t1, t1, 128
- addiu t6, t6, 128
- lw t0, 20(v0)
- sb t1, 0(t9)
- sb t6, 1(t9)
- sll t0, t0, 15
- lw t9, 4(a2)
- addu t1, t0, t4
- subu t6, t0, t4
- addu t9, t9, a3
- shra_r.w t1, t1, 20
- shra_r.w t6, t6, 20
- shll_s.w t1, t1, 24
- shll_s.w t6, t6, 24
- sra t1, t1, 24
- sra t6, t6, 24
- addiu t1, t1, 128
- addiu t6, t6, 128
- sb t1, 0(t9)
- sb t6, 1(t9)
- addiu sp, sp, 40
- RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5
- j ra
- nop
- END(jsimd_idct_2x2_dspr2)
- LEAF_DSPR2(jsimd_idct_4x4_dspr2)
- .set at
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- lw v1, 48(sp)
- move t0, a1
- move t1, v1
- li t9, 4
- li s0, 0x2e75f93e
- li s1, 0x21f9ba79
- li s2, 0xecc2efb0
- li s3, 0x52031ccd
- 0:
- lh s6, 32(t0)
- lh t6, 32(a0)
- lh s7, 96(t0)
- lh t7, 96(a0)
- mul t6, s6, t6
- lh s4, 0(t0)
- mul t7, s7, t7
- lh s5, 0(a0)
- li s6, 15137
- li s7, 6270
- mul t2, s4, s5
- mul t6, s6, t6
- lh t5, 112(t0)
- mul t7, s7, t7
- lh s4, 112(a0)
- lh v0, 80(t0)
- lh s5, 80(a0)
- lh s6, 48(a0)
- sll t2, t2, 14
- lh s7, 16(a0)
- lh t8, 16(t0)
- subu t6, t6, t7
- lh t7, 48(t0)
- mul t5, s4, t5
- mul v0, s5, v0
- mul t7, s6, t7
- mul t8, s7, t8
- addu t3, t2, t6
- subu t4, t2, t6
- mult $ac0, zero, zero
- mult $ac1, zero, zero
- ins t5, v0, 16, 16
- ins t7, t8, 16, 16
- addiu t9, t9, -1
- dpa.w.ph $ac0, t5, s0
- dpa.w.ph $ac0, t7, s1
- dpa.w.ph $ac1, t5, s2
- dpa.w.ph $ac1, t7, s3
- mflo s4, $ac0
- mflo s5, $ac1
- addiu a0, a0, 2
- addiu t1, t1, 4
- addiu t0, t0, 2
- addu t6, t4, s4
- subu t5, t4, s4
- addu s6, t3, s5
- subu s7, t3, s5
- shra_r.w t6, t6, 12
- shra_r.w t5, t5, 12
- shra_r.w s6, s6, 12
- shra_r.w s7, s7, 12
- sw t6, 28(t1)
- sw t5, 60(t1)
- sw s6, -4(t1)
- bgtz t9, 0b
- sw s7, 92(t1)
-
- li t9, 3
- 1:
- lh s6, 34(t0)
- lh t6, 34(a0)
- lh s7, 98(t0)
- lh t7, 98(a0)
- mul t6, s6, t6
- lh s4, 2(t0)
- mul t7, s7, t7
- lh s5, 2(a0)
- li s6, 15137
- li s7, 6270
- mul t2, s4, s5
- mul v0, s6, t6
- lh t5, 114(t0)
- mul t7, s7, t7
- lh s4, 114(a0)
- lh s5, 82(a0)
- lh t6, 82(t0)
- sll t2, t2, 14
- lh s6, 50(a0)
- lh t8, 18(t0)
- subu v0, v0, t7
- lh t7, 50(t0)
- lh s7, 18(a0)
- mul t5, s4, t5
- mul t6, s5, t6
- mul t7, s6, t7
- mul t8, s7, t8
- addu t3, t2, v0
- subu t4, t2, v0
- mult $ac0, zero, zero
- mult $ac1, zero, zero
- ins t5, t6, 16, 16
- ins t7, t8, 16, 16
- dpa.w.ph $ac0, t5, s0
- dpa.w.ph $ac0, t7, s1
- dpa.w.ph $ac1, t5, s2
- dpa.w.ph $ac1, t7, s3
- mflo t5, $ac0
- mflo t6, $ac1
- addiu t9, t9, -1
- addiu t0, t0, 2
- addiu a0, a0, 2
- addiu t1, t1, 4
- addu s5, t4, t5
- subu s4, t4, t5
- addu s6, t3, t6
- subu s7, t3, t6
- shra_r.w s5, s5, 12
- shra_r.w s4, s4, 12
- shra_r.w s6, s6, 12
- shra_r.w s7, s7, 12
- sw s5, 32(t1)
- sw s4, 64(t1)
- sw s6, 0(t1)
- bgtz t9, 1b
- sw s7, 96(t1)
- move t1, v1
- li s4, 15137
- lw s6, 8(t1)
- li s5, 6270
- lw s7, 24(t1)
- mul s4, s4, s6
- lw t2, 0(t1)
- mul s5, s5, s7
- lh t5, 28(t1)
- lh t6, 20(t1)
- lh t7, 12(t1)
- lh t8, 4(t1)
- ins t5, t6, 16, 16
- ins t7, t8, 16, 16
- mult $ac0, zero, zero
- dpa.w.ph $ac0, t5, s0
- dpa.w.ph $ac0, t7, s1
- mult $ac1, zero, zero
- dpa.w.ph $ac1, t5, s2
- dpa.w.ph $ac1, t7, s3
- sll t2, t2, 14
- mflo s6, $ac0
-
- subu s4, s4, s5
- addu t3, t2, s4
- mflo s7, $ac1
- subu t4, t2, s4
- addu t7, t4, s6
- subu t8, t4, s6
- addu t5, t3, s7
- subu t6, t3, s7
- shra_r.w t5, t5, 19
- shra_r.w t6, t6, 19
- shra_r.w t7, t7, 19
- shra_r.w t8, t8, 19
- sll s4, t9, 2
- lw v0, 0(a2)
- shll_s.w t5, t5, 24
- shll_s.w t6, t6, 24
- shll_s.w t7, t7, 24
- shll_s.w t8, t8, 24
- sra t5, t5, 24
- sra t6, t6, 24
- sra t7, t7, 24
- sra t8, t8, 24
- addu v0, v0, a3
- addiu t5, t5, 128
- addiu t6, t6, 128
- addiu t7, t7, 128
- addiu t8, t8, 128
- sb t5, 0(v0)
- sb t7, 1(v0)
- sb t8, 2(v0)
- sb t6, 3(v0)
-
- li s4, 15137
- lw s6, 40(t1)
- li s5, 6270
- lw s7, 56(t1)
- mul s4, s4, s6
- lw t2, 32(t1)
- mul s5, s5, s7
- lh t5, 60(t1)
- lh t6, 52(t1)
- lh t7, 44(t1)
- lh t8, 36(t1)
- ins t5, t6, 16, 16
- ins t7, t8, 16, 16
- mult $ac0, zero, zero
- dpa.w.ph $ac0, t5, s0
- dpa.w.ph $ac0, t7, s1
- mult $ac1, zero, zero
- dpa.w.ph $ac1, t5, s2
- dpa.w.ph $ac1, t7, s3
- sll t2, t2, 14
- mflo s6, $ac0
-
- subu s4, s4, s5
- addu t3, t2, s4
- mflo s7, $ac1
- subu t4, t2, s4
- addu t7, t4, s6
- subu t8, t4, s6
- addu t5, t3, s7
- subu t6, t3, s7
- shra_r.w t5, t5, 19
- shra_r.w t6, t6, 19
- shra_r.w t7, t7, 19
- shra_r.w t8, t8, 19
- sll s4, t9, 2
- lw v0, 4(a2)
- shll_s.w t5, t5, 24
- shll_s.w t6, t6, 24
- shll_s.w t7, t7, 24
- shll_s.w t8, t8, 24
- sra t5, t5, 24
- sra t6, t6, 24
- sra t7, t7, 24
- sra t8, t8, 24
- addu v0, v0, a3
- addiu t5, t5, 128
- addiu t6, t6, 128
- addiu t7, t7, 128
- addiu t8, t8, 128
- sb t5, 0(v0)
- sb t7, 1(v0)
- sb t8, 2(v0)
- sb t6, 3(v0)
-
- li s4, 15137
- lw s6, 72(t1)
- li s5, 6270
- lw s7, 88(t1)
- mul s4, s4, s6
- lw t2, 64(t1)
- mul s5, s5, s7
- lh t5, 92(t1)
- lh t6, 84(t1)
- lh t7, 76(t1)
- lh t8, 68(t1)
- ins t5, t6, 16, 16
- ins t7, t8, 16, 16
- mult $ac0, zero, zero
- dpa.w.ph $ac0, t5, s0
- dpa.w.ph $ac0, t7, s1
- mult $ac1, zero, zero
- dpa.w.ph $ac1, t5, s2
- dpa.w.ph $ac1, t7, s3
- sll t2, t2, 14
- mflo s6, $ac0
-
- subu s4, s4, s5
- addu t3, t2, s4
- mflo s7, $ac1
- subu t4, t2, s4
- addu t7, t4, s6
- subu t8, t4, s6
- addu t5, t3, s7
- subu t6, t3, s7
- shra_r.w t5, t5, 19
- shra_r.w t6, t6, 19
- shra_r.w t7, t7, 19
- shra_r.w t8, t8, 19
- sll s4, t9, 2
- lw v0, 8(a2)
- shll_s.w t5, t5, 24
- shll_s.w t6, t6, 24
- shll_s.w t7, t7, 24
- shll_s.w t8, t8, 24
- sra t5, t5, 24
- sra t6, t6, 24
- sra t7, t7, 24
- sra t8, t8, 24
- addu v0, v0, a3
- addiu t5, t5, 128
- addiu t6, t6, 128
- addiu t7, t7, 128
- addiu t8, t8, 128
- sb t5, 0(v0)
- sb t7, 1(v0)
- sb t8, 2(v0)
- sb t6, 3(v0)
- li s4, 15137
- lw s6, 104(t1)
- li s5, 6270
- lw s7, 120(t1)
- mul s4, s4, s6
- lw t2, 96(t1)
- mul s5, s5, s7
- lh t5, 124(t1)
- lh t6, 116(t1)
- lh t7, 108(t1)
- lh t8, 100(t1)
- ins t5, t6, 16, 16
- ins t7, t8, 16, 16
- mult $ac0, zero, zero
- dpa.w.ph $ac0, t5, s0
- dpa.w.ph $ac0, t7, s1
- mult $ac1, zero, zero
- dpa.w.ph $ac1, t5, s2
- dpa.w.ph $ac1, t7, s3
- sll t2, t2, 14
- mflo s6, $ac0
-
- subu s4, s4, s5
- addu t3, t2, s4
- mflo s7, $ac1
- subu t4, t2, s4
- addu t7, t4, s6
- subu t8, t4, s6
- addu t5, t3, s7
- subu t6, t3, s7
- shra_r.w t5, t5, 19
- shra_r.w t6, t6, 19
- shra_r.w t7, t7, 19
- shra_r.w t8, t8, 19
- sll s4, t9, 2
- lw v0, 12(a2)
- shll_s.w t5, t5, 24
- shll_s.w t6, t6, 24
- shll_s.w t7, t7, 24
- shll_s.w t8, t8, 24
- sra t5, t5, 24
- sra t6, t6, 24
- sra t7, t7, 24
- sra t8, t8, 24
- addu v0, v0, a3
- addiu t5, t5, 128
- addiu t6, t6, 128
- addiu t7, t7, 128
- addiu t8, t8, 128
- sb t5, 0(v0)
- sb t7, 1(v0)
- sb t8, 2(v0)
- sb t6, 3(v0)
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_idct_4x4_dspr2)
- LEAF_DSPR2(jsimd_idct_6x6_dspr2)
- .set at
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- addiu sp, sp, -144
- move v0, sp
- addiu v1, v0, 24
- addiu t9, zero, 5793
- addiu s0, zero, 10033
- addiu s1, zero, 2998
- 1:
- lh s2, 0(a0)
- lh s3, 32(a0)
- lh s4, 64(a0)
- lh t2, 64(a1)
- lh t1, 32(a1)
- lh t0, 0(a1)
- mul t2, t2, s4
- mul t1, t1, s3
- mul t0, t0, s2
- lh t6, 16(a1)
- lh t8, 80(a1)
- lh t7, 48(a1)
- lh s2, 16(a0)
- lh s4, 80(a0)
- lh s3, 48(a0)
- mul t2, t2, t9
- mul t1, t1, s0
- sll t0, t0, 13
- mul t6, t6, s2
- mul t8, t8, s4
- mul t7, t7, s3
- addu t3, t0, t2
- sll t2, t2, 1
- subu t4, t0, t2
- subu t5, t3, t1
- addu t3, t3, t1
- addu t1, t6, t8
- mul t1, t1, s1
- shra_r.w t4, t4, 11
- subu t2, t6, t8
- subu t2, t2, t7
- sll t2, t2, 2
- addu t0, t6, t7
- sll t0, t0, 13
- subu s2, t8, t7
- sll s2, s2, 13
- addu t0, t0, t1
- addu t1, s2, t1
- addu s2, t4, t2
- subu s3, t4, t2
- addu t6, t3, t0
- subu t7, t3, t0
- addu t4, t5, t1
- subu t5, t5, t1
- shra_r.w t6, t6, 11
- shra_r.w t7, t7, 11
- shra_r.w t4, t4, 11
- shra_r.w t5, t5, 11
- sw s2, 24(v0)
- sw s3, 96(v0)
- sw t6, 0(v0)
- sw t7, 120(v0)
- sw t4, 48(v0)
- sw t5, 72(v0)
- addiu v0, v0, 4
- addiu a1, a1, 2
- bne v0, v1, 1b
- addiu a0, a0, 2
-
- move v0, sp
- addiu v1, v0, 144
- 2:
- lw t0, 0(v0)
- lw t2, 16(v0)
- lw s5, 0(a2)
- addiu t0, t0, 16
- sll t0, t0, 13
- mul t3, t2, t9
- lw t6, 4(v0)
- lw t8, 20(v0)
- lw t7, 12(v0)
- addu s5, s5, a3
- addu s6, t6, t8
- mul s6, s6, s1
- addu t1, t0, t3
- subu t4, t0, t3
- subu t4, t4, t3
- lw t3, 8(v0)
- mul t0, t3, s0
- addu s7, t6, t7
- sll s7, s7, 13
- addu s7, s6, s7
- subu t2, t8, t7
- sll t2, t2, 13
- addu t2, s6, t2
- subu s6, t6, t7
- subu s6, s6, t8
- sll s6, s6, 13
- addu t3, t1, t0
- subu t5, t1, t0
- addu t6, t3, s7
- subu t3, t3, s7
- addu t7, t4, s6
- subu t4, t4, s6
- addu t8, t5, t2
- subu t5, t5, t2
- shll_s.w t6, t6, 6
- shll_s.w t3, t3, 6
- shll_s.w t7, t7, 6
- shll_s.w t4, t4, 6
- shll_s.w t8, t8, 6
- shll_s.w t5, t5, 6
- sra t6, t6, 24
- addiu t6, t6, 128
- sra t3, t3, 24
- addiu t3, t3, 128
- sb t6, 0(s5)
- sra t7, t7, 24
- addiu t7, t7, 128
- sb t3, 5(s5)
- sra t4, t4, 24
- addiu t4, t4, 128
- sb t7, 1(s5)
- sra t8, t8, 24
- addiu t8, t8, 128
- sb t4, 4(s5)
- addiu v0, v0, 24
- sra t5, t5, 24
- addiu t5, t5, 128
- sb t8, 2(s5)
- addiu a2, a2, 4
- bne v0, v1, 2b
- sb t5, 3(s5)
- addiu sp, sp, 144
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_idct_6x6_dspr2)
- LEAF_DSPR2(jsimd_idct_12x12_pass1_dspr2)
- SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
- li a3, 8
- 1:
-
- lh t0, 48(a1)
- lh t1, 48(a0)
- lh t2, 16(a1)
- lh t3, 16(a0)
- lh t4, 80(a1)
- lh t5, 80(a0)
- lh t6, 112(a1)
- lh t7, 112(a0)
- mul t0, t0, t1
- mul t1, t2, t3
- mul t2, t4, t5
- mul t3, t6, t7
- li t4, 10703
- li t5, 4433
- li t6, 7053
- mul t4, t0, t4
- mul t5, t0, t5
- addu t7, t1, t2
- addu t8, t7, t3
- mul t6, t6, t8
- li t8, 2139
- mul t8, t7, t8
- li t7, 2295
- mul t7, t1, t7
- addu t9, t2, t3
- li s0, 8565
- mul t9, t9, s0
- li s0, 12112
- mul s0, t2, s0
- li s1, 12998
- mul s1, t3, s1
- li s2, 5540
- mul s2, t1, s2
- li s3, 16244
- mul s3, t3, s3
- subu t1, t1, t3
- subu t0, t0, t2
- addu t2, t0, t1
- li t3, 4433
- mul t2, t2, t3
- li t3, 6270
- mul t1, t1, t3
- li t3, 15137
- mul t0, t0, t3
- addu t8, t6, t8
- addu t3, t8, t4
- addu t3, t3, t7
- subu t8, t8, t9
- addu s0, t5, s0
- subu t8, t8, s0
- subu t9, t6, t9
- subu s1, s1, t4
- addu t9, t9, s1
- subu t6, t6, t5
- subu t6, t6, s2
- subu t6, t6, s3
-
- lh t4, 64(a1)
- lh t5, 64(a0)
- lh t7, 32(a1)
- lh s0, 32(a0)
- lh s1, 0(a1)
- lh s2, 0(a0)
- lh s3, 96(a1)
- lh v0, 96(a0)
- mul t4, t4, t5
- mul t5, t7, s0
- mul t7, s1, s2
- mul s0, s3, v0
-
- addu t1, t2, t1
- subu t0, t2, t0
-
- addiu a3, a3, -1
- addiu a0, a0, 2
- addiu a1, a1, 2
-
- li s1, 10033
- li s2, 11190
- mul t4, t4, s1
- mul s1, t5, s2
- sll t5, t5, 13
- sll t7, t7, 13
- addiu t7, t7, 1024
- sll s0, s0, 13
- addu s2, t7, t4
- subu t4, t7, t4
- subu s3, t5, s0
- addu t2, t7, s3
- subu s3, t7, s3
- addu t7, s1, s0
- addu v0, s2, t7
- subu s2, s2, t7
- subu s1, s1, t5
- subu s1, s1, s0
- addu s0, t4, s1
- subu t4, t4, s1
-
- addu t5, v0, t3
- subu v0, v0, t3
- addu t3, t2, t1
- subu t2, t2, t1
- addu t1, s0, t8
- subu s0, s0, t8
- addu t8, t4, t9
- subu t4, t4, t9
- addu t9, s3, t0
- subu s3, s3, t0
- addu t0, s2, t6
- subu s2, s2, t6
- sra t5, t5, 11
- sra t3, t3, 11
- sra t1, t1, 11
- sra t8, t8, 11
- sra t9, t9, 11
- sra t0, t0, 11
- sra s2, s2, 11
- sra s3, s3, 11
- sra t4, t4, 11
- sra s0, s0, 11
- sra t2, t2, 11
- sra v0, v0, 11
- sw t5, 0(a2)
- sw t3, 32(a2)
- sw t1, 64(a2)
- sw t8, 96(a2)
- sw t9, 128(a2)
- sw t0, 160(a2)
- sw s2, 192(a2)
- sw s3, 224(a2)
- sw t4, 256(a2)
- sw s0, 288(a2)
- sw t2, 320(a2)
- sw v0, 352(a2)
- bgtz a3, 1b
- addiu a2, a2, 4
- RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
- j ra
- nop
- END(jsimd_idct_12x12_pass1_dspr2)
- LEAF_DSPR2(jsimd_idct_12x12_pass2_dspr2)
- SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
- li a3, 12
- 1:
-
- lw t0, 12(a0)
- lw t1, 4(a0)
- lw t2, 20(a0)
- lw t3, 28(a0)
- li t4, 10703
- li t5, 4433
- mul t4, t0, t4
- mul t5, t0, t5
- addu t6, t1, t2
- li t7, 2139
- mul t7, t6, t7
- addu t6, t6, t3
- li t8, 7053
- mul t6, t6, t8
- li t8, 2295
- mul t8, t1, t8
- addu t9, t2, t3
- li s0, 8565
- mul t9, t9, s0
- li s0, 12112
- mul s0, t2, s0
- li s1, 12998
- mul s1, t3, s1
- li s2, 5540
- mul s2, t1, s2
- li s3, 16244
- mul s3, t3, s3
- subu t1, t1, t3
- subu t0, t0, t2
- addu t2, t1, t0
- li t3, 4433
- mul t2, t2, t3
- li t3, 6270
- mul t1, t1, t3
- li t3, 15137
- mul t0, t0, t3
- addu t3, t6, t7
- addu t7, t3, t4
- addu t7, t7, t8
- subu t3, t3, t9
- subu t3, t3, t5
- subu t3, t3, s0
- subu t9, t6, t9
- subu t9, t9, t4
- addu t9, t9, s1
- subu t6, t6, t5
- subu t6, t6, s2
- subu t6, t6, s3
- addu t1, t2, t1
- subu t0, t2, t0
-
- lw t2, 16(a0)
- lw t4, 8(a0)
- lw t5, 0(a0)
- lw t8, 24(a0)
- li s0, 10033
- li s1, 11190
- mul t2, t2, s0
- mul s0, t4, s1
- addiu t5, t5, 0x10
- sll t5, t5, 13
- sll t4, t4, 13
- sll t8, t8, 13
- subu s1, t4, t8
- addu s2, t5, t2
- subu t2, t5, t2
- addu s3, t5, s1
- subu s1, t5, s1
- addu t5, s0, t8
- addu v0, s2, t5
- subu t5, s2, t5
- subu t4, s0, t4
- subu t4, t4, t8
- addu t8, t2, t4
- subu t2, t2, t4
-
- addiu a3, a3, -1
- addiu a0, a0, 32
-
- addu t4, v0, t7
- subu v0, v0, t7
- addu t7, s3, t1
- subu s3, s3, t1
- addu t1, t8, t3
- subu t8, t8, t3
- addu t3, t2, t9
- subu t2, t2, t9
- addu t9, s1, t0
- subu s1, s1, t0
- addu t0, t5, t6
- subu t5, t5, t6
- sll t4, t4, 4
- sll t7, t7, 4
- sll t1, t1, 4
- sll t3, t3, 4
- sll t9, t9, 4
- sll t0, t0, 4
- sll t5, t5, 4
- sll s1, s1, 4
- sll t2, t2, 4
- sll t8, t8, 4
- sll s3, s3, 4
- sll v0, v0, 4
- shll_s.w t4, t4, 2
- shll_s.w t7, t7, 2
- shll_s.w t1, t1, 2
- shll_s.w t3, t3, 2
- shll_s.w t9, t9, 2
- shll_s.w t0, t0, 2
- shll_s.w t5, t5, 2
- shll_s.w s1, s1, 2
- shll_s.w t2, t2, 2
- shll_s.w t8, t8, 2
- shll_s.w s3, s3, 2
- shll_s.w v0, v0, 2
- srl t4, t4, 24
- srl t7, t7, 24
- srl t1, t1, 24
- srl t3, t3, 24
- srl t9, t9, 24
- srl t0, t0, 24
- srl t5, t5, 24
- srl s1, s1, 24
- srl t2, t2, 24
- srl t8, t8, 24
- srl s3, s3, 24
- srl v0, v0, 24
- lw t6, 0(a1)
- addiu t4, t4, 0x80
- addiu t7, t7, 0x80
- addiu t1, t1, 0x80
- addiu t3, t3, 0x80
- addiu t9, t9, 0x80
- addiu t0, t0, 0x80
- addiu t5, t5, 0x80
- addiu s1, s1, 0x80
- addiu t2, t2, 0x80
- addiu t8, t8, 0x80
- addiu s3, s3, 0x80
- addiu v0, v0, 0x80
- sb t4, 0(t6)
- sb t7, 1(t6)
- sb t1, 2(t6)
- sb t3, 3(t6)
- sb t9, 4(t6)
- sb t0, 5(t6)
- sb t5, 6(t6)
- sb s1, 7(t6)
- sb t2, 8(t6)
- sb t8, 9(t6)
- sb s3, 10(t6)
- sb v0, 11(t6)
- bgtz a3, 1b
- addiu a1, a1, 4
- RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
- jr ra
- nop
- END(jsimd_idct_12x12_pass2_dspr2)
- LEAF_DSPR2(jsimd_convsamp_dspr2)
- lw t0, 0(a0)
- li t7, 0xff80ff80
- addu t0, t0, a1
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- lw t0, 4(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 0(a2)
- usw t4, 4(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 8(a2)
- usw t6, 12(a2)
- lw t0, 8(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 16(a2)
- usw t4, 20(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 24(a2)
- usw t6, 28(a2)
- lw t0, 12(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 32(a2)
- usw t4, 36(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 40(a2)
- usw t6, 44(a2)
- lw t0, 16(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 48(a2)
- usw t4, 52(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 56(a2)
- usw t6, 60(a2)
- lw t0, 20(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 64(a2)
- usw t4, 68(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 72(a2)
- usw t6, 76(a2)
- lw t0, 24(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 80(a2)
- usw t4, 84(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 88(a2)
- usw t6, 92(a2)
- lw t0, 28(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 96(a2)
- usw t4, 100(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 104(a2)
- usw t6, 108(a2)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 112(a2)
- usw t4, 116(a2)
- usw t5, 120(a2)
- usw t6, 124(a2)
- j ra
- nop
- END(jsimd_convsamp_dspr2)
- #ifndef __mips_soft_float
- LEAF_DSPR2(jsimd_convsamp_float_dspr2)
- .set at
- lw t0, 0(a0)
- addu t0, t0, a1
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 4(a0)
- swc1 f2, 0(a2)
- swc1 f4, 4(a2)
- swc1 f6, 8(a2)
- addu t0, t0, a1
- swc1 f8, 12(a2)
- swc1 f10, 16(a2)
- swc1 f12, 20(a2)
- swc1 f14, 24(a2)
- swc1 f16, 28(a2)
-
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 8(a0)
- swc1 f2, 32(a2)
- swc1 f4, 36(a2)
- swc1 f6, 40(a2)
- addu t0, t0, a1
- swc1 f8, 44(a2)
- swc1 f10, 48(a2)
- swc1 f12, 52(a2)
- swc1 f14, 56(a2)
- swc1 f16, 60(a2)
-
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 12(a0)
- swc1 f2, 64(a2)
- swc1 f4, 68(a2)
- swc1 f6, 72(a2)
- addu t0, t0, a1
- swc1 f8, 76(a2)
- swc1 f10, 80(a2)
- swc1 f12, 84(a2)
- swc1 f14, 88(a2)
- swc1 f16, 92(a2)
-
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 16(a0)
- swc1 f2, 96(a2)
- swc1 f4, 100(a2)
- swc1 f6, 104(a2)
- addu t0, t0, a1
- swc1 f8, 108(a2)
- swc1 f10, 112(a2)
- swc1 f12, 116(a2)
- swc1 f14, 120(a2)
- swc1 f16, 124(a2)
-
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 20(a0)
- swc1 f2, 128(a2)
- swc1 f4, 132(a2)
- swc1 f6, 136(a2)
- addu t0, t0, a1
- swc1 f8, 140(a2)
- swc1 f10, 144(a2)
- swc1 f12, 148(a2)
- swc1 f14, 152(a2)
- swc1 f16, 156(a2)
-
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 24(a0)
- swc1 f2, 160(a2)
- swc1 f4, 164(a2)
- swc1 f6, 168(a2)
- addu t0, t0, a1
- swc1 f8, 172(a2)
- swc1 f10, 176(a2)
- swc1 f12, 180(a2)
- swc1 f14, 184(a2)
- swc1 f16, 188(a2)
-
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 28(a0)
- swc1 f2, 192(a2)
- swc1 f4, 196(a2)
- swc1 f6, 200(a2)
- addu t0, t0, a1
- swc1 f8, 204(a2)
- swc1 f10, 208(a2)
- swc1 f12, 212(a2)
- swc1 f14, 216(a2)
- swc1 f16, 220(a2)
-
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- swc1 f2, 224(a2)
- swc1 f4, 228(a2)
- swc1 f6, 232(a2)
- swc1 f8, 236(a2)
- swc1 f10, 240(a2)
- swc1 f12, 244(a2)
- swc1 f14, 248(a2)
- swc1 f16, 252(a2)
- j ra
- nop
- END(jsimd_convsamp_float_dspr2)
- #endif
|