123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324 |
- %include "jsimdext.inc"
- SECTION SEG_TEXT
- BITS 32
- %define img_width(b) (b) + 8
- %define max_v_samp(b) (b) + 12
- %define v_samp(b) (b) + 16
- %define width_blks(b) (b) + 20
- %define input_data(b) (b) + 24
- %define output_data(b) (b) + 28
- align 32
- GLOBAL_FUNCTION(jsimd_h2v1_downsample_mmx)
- EXTN(jsimd_h2v1_downsample_mmx):
- push ebp
- mov ebp, esp
- push esi
- push edi
- mov ecx, JDIMENSION [width_blks(ebp)]
- shl ecx, 3
- jz near .return
- mov edx, JDIMENSION [img_width(ebp)]
-
- push ecx
- shl ecx, 1
- sub ecx, edx
- jle short .expand_end
- mov eax, INT [max_v_samp(ebp)]
- test eax, eax
- jle short .expand_end
- cld
- mov esi, JSAMPARRAY [input_data(ebp)]
- alignx 16, 7
- .expandloop:
- push eax
- push ecx
- mov edi, JSAMPROW [esi]
- add edi, edx
- mov al, JSAMPLE [edi-1]
- rep stosb
- pop ecx
- pop eax
- add esi, byte SIZEOF_JSAMPROW
- dec eax
- jg short .expandloop
- .expand_end:
- pop ecx
-
- mov eax, JDIMENSION [v_samp(ebp)]
- test eax, eax
- jle near .return
- mov edx, 0x00010000
- movd mm7, edx
- pcmpeqw mm6, mm6
- punpckldq mm7, mm7
- psrlw mm6, BYTE_BIT
- mov esi, JSAMPARRAY [input_data(ebp)]
- mov edi, JSAMPARRAY [output_data(ebp)]
- alignx 16, 7
- .rowloop:
- push ecx
- push edi
- push esi
- mov esi, JSAMPROW [esi]
- mov edi, JSAMPROW [edi]
- alignx 16, 7
- .columnloop:
- movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
- movq mm1, MMWORD [esi+1*SIZEOF_MMWORD]
- movq mm2, mm0
- movq mm3, mm1
- pand mm0, mm6
- psrlw mm2, BYTE_BIT
- pand mm1, mm6
- psrlw mm3, BYTE_BIT
- paddw mm0, mm2
- paddw mm1, mm3
- paddw mm0, mm7
- paddw mm1, mm7
- psrlw mm0, 1
- psrlw mm1, 1
- packuswb mm0, mm1
- movq MMWORD [edi+0*SIZEOF_MMWORD], mm0
- add esi, byte 2*SIZEOF_MMWORD
- add edi, byte 1*SIZEOF_MMWORD
- sub ecx, byte SIZEOF_MMWORD
- jnz short .columnloop
- pop esi
- pop edi
- pop ecx
- add esi, byte SIZEOF_JSAMPROW
- add edi, byte SIZEOF_JSAMPROW
- dec eax
- jg short .rowloop
- emms
- .return:
- pop edi
- pop esi
- pop ebp
- ret
- %define img_width(b) (b) + 8
- %define max_v_samp(b) (b) + 12
- %define v_samp(b) (b) + 16
- %define width_blks(b) (b) + 20
- %define input_data(b) (b) + 24
- %define output_data(b) (b) + 28
- align 32
- GLOBAL_FUNCTION(jsimd_h2v2_downsample_mmx)
- EXTN(jsimd_h2v2_downsample_mmx):
- push ebp
- mov ebp, esp
- push esi
- push edi
- mov ecx, JDIMENSION [width_blks(ebp)]
- shl ecx, 3
- jz near .return
- mov edx, JDIMENSION [img_width(ebp)]
-
- push ecx
- shl ecx, 1
- sub ecx, edx
- jle short .expand_end
- mov eax, INT [max_v_samp(ebp)]
- test eax, eax
- jle short .expand_end
- cld
- mov esi, JSAMPARRAY [input_data(ebp)]
- alignx 16, 7
- .expandloop:
- push eax
- push ecx
- mov edi, JSAMPROW [esi]
- add edi, edx
- mov al, JSAMPLE [edi-1]
- rep stosb
- pop ecx
- pop eax
- add esi, byte SIZEOF_JSAMPROW
- dec eax
- jg short .expandloop
- .expand_end:
- pop ecx
-
- mov eax, JDIMENSION [v_samp(ebp)]
- test eax, eax
- jle near .return
- mov edx, 0x00020001
- movd mm7, edx
- pcmpeqw mm6, mm6
- punpckldq mm7, mm7
- psrlw mm6, BYTE_BIT
- mov esi, JSAMPARRAY [input_data(ebp)]
- mov edi, JSAMPARRAY [output_data(ebp)]
- alignx 16, 7
- .rowloop:
- push ecx
- push edi
- push esi
- mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]
- mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]
- mov edi, JSAMPROW [edi]
- alignx 16, 7
- .columnloop:
- movq mm0, MMWORD [edx+0*SIZEOF_MMWORD]
- movq mm1, MMWORD [esi+0*SIZEOF_MMWORD]
- movq mm2, MMWORD [edx+1*SIZEOF_MMWORD]
- movq mm3, MMWORD [esi+1*SIZEOF_MMWORD]
- movq mm4, mm0
- movq mm5, mm1
- pand mm0, mm6
- psrlw mm4, BYTE_BIT
- pand mm1, mm6
- psrlw mm5, BYTE_BIT
- paddw mm0, mm4
- paddw mm1, mm5
- movq mm4, mm2
- movq mm5, mm3
- pand mm2, mm6
- psrlw mm4, BYTE_BIT
- pand mm3, mm6
- psrlw mm5, BYTE_BIT
- paddw mm2, mm4
- paddw mm3, mm5
- paddw mm0, mm1
- paddw mm2, mm3
- paddw mm0, mm7
- paddw mm2, mm7
- psrlw mm0, 2
- psrlw mm2, 2
- packuswb mm0, mm2
- movq MMWORD [edi+0*SIZEOF_MMWORD], mm0
- add edx, byte 2*SIZEOF_MMWORD
- add esi, byte 2*SIZEOF_MMWORD
- add edi, byte 1*SIZEOF_MMWORD
- sub ecx, byte SIZEOF_MMWORD
- jnz near .columnloop
- pop esi
- pop edi
- pop ecx
- add esi, byte 2*SIZEOF_JSAMPROW
- add edi, byte 1*SIZEOF_JSAMPROW
- dec eax
- jg near .rowloop
- emms
- .return:
- pop edi
- pop esi
- pop ebp
- ret
- align 32
|