123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228 |
- void jsimd_rgb_gray_convert_altivec(JDIMENSION img_width, JSAMPARRAY input_buf,
- JSAMPIMAGE output_buf,
- JDIMENSION output_row, int num_rows)
- {
- JSAMPROW inptr, outptr;
- int pitch = img_width * RGB_PIXELSIZE, num_cols;
- #if __BIG_ENDIAN__
- int offset;
- unsigned char __attribute__((aligned(16))) tmpbuf[RGB_PIXELSIZE * 16];
- #endif
- __vector unsigned char rgb0, rgb1 = { 0 }, rgb2 = { 0 },
- rgbg0, rgbg1, rgbg2, rgbg3, y;
- #if __BIG_ENDIAN__ || RGB_PIXELSIZE == 4
- __vector unsigned char rgb3 = { 0 };
- #endif
- #if __BIG_ENDIAN__ && RGB_PIXELSIZE == 4
- __vector unsigned char rgb4 = { 0 };
- #endif
- __vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3;
- __vector unsigned short yl, yh;
- __vector int y0, y1, y2, y3;
-
- __vector short pw_f0299_f0337 = { __4X2(F_0_299, F_0_337) },
- pw_f0114_f0250 = { __4X2(F_0_114, F_0_250) };
- __vector int pd_onehalf = { __4X(ONE_HALF) };
- __vector unsigned char pb_zero = { __16X(0) },
- #if __BIG_ENDIAN__
- shift_pack_index =
- { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
- #else
- shift_pack_index =
- { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
- #endif
- while (--num_rows >= 0) {
- inptr = *input_buf++;
- outptr = output_buf[0][output_row];
- output_row++;
- for (num_cols = pitch; num_cols > 0;
- num_cols -= RGB_PIXELSIZE * 16, inptr += RGB_PIXELSIZE * 16,
- outptr += 16) {
- #if __BIG_ENDIAN__
-
- offset = (size_t)inptr & 15;
- if (offset) {
- __vector unsigned char unaligned_shift_index;
- int bytes = num_cols + offset;
- if (bytes < (RGB_PIXELSIZE + 1) * 16 && (bytes & 15)) {
-
- memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16));
- rgb0 = vec_ld(0, tmpbuf);
- rgb1 = vec_ld(16, tmpbuf);
- rgb2 = vec_ld(32, tmpbuf);
- #if RGB_PIXELSIZE == 4
- rgb3 = vec_ld(48, tmpbuf);
- #endif
- } else {
-
- rgb0 = vec_ld(0, inptr);
- if (bytes > 16)
- rgb1 = vec_ld(16, inptr);
- if (bytes > 32)
- rgb2 = vec_ld(32, inptr);
- if (bytes > 48)
- rgb3 = vec_ld(48, inptr);
- #if RGB_PIXELSIZE == 4
- if (bytes > 64)
- rgb4 = vec_ld(64, inptr);
- #endif
- unaligned_shift_index = vec_lvsl(0, inptr);
- rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index);
- rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index);
- rgb2 = vec_perm(rgb2, rgb3, unaligned_shift_index);
- #if RGB_PIXELSIZE == 4
- rgb3 = vec_perm(rgb3, rgb4, unaligned_shift_index);
- #endif
- }
- } else {
- if (num_cols < RGB_PIXELSIZE * 16 && (num_cols & 15)) {
-
- memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16));
- rgb0 = vec_ld(0, tmpbuf);
- rgb1 = vec_ld(16, tmpbuf);
- rgb2 = vec_ld(32, tmpbuf);
- #if RGB_PIXELSIZE == 4
- rgb3 = vec_ld(48, tmpbuf);
- #endif
- } else {
-
- rgb0 = vec_ld(0, inptr);
- if (num_cols > 16)
- rgb1 = vec_ld(16, inptr);
- if (num_cols > 32)
- rgb2 = vec_ld(32, inptr);
- #if RGB_PIXELSIZE == 4
- if (num_cols > 48)
- rgb3 = vec_ld(48, inptr);
- #endif
- }
- }
- #else
-
- rgb0 = vec_vsx_ld(0, inptr);
- if (num_cols > 16)
- rgb1 = vec_vsx_ld(16, inptr);
- if (num_cols > 32)
- rgb2 = vec_vsx_ld(32, inptr);
- #if RGB_PIXELSIZE == 4
- if (num_cols > 48)
- rgb3 = vec_vsx_ld(48, inptr);
- #endif
- #endif
- #if RGB_PIXELSIZE == 3
-
- rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX0);
- rgbg1 = vec_perm(rgb0, rgb1, (__vector unsigned char)RGBG_INDEX1);
- rgbg2 = vec_perm(rgb1, rgb2, (__vector unsigned char)RGBG_INDEX2);
- rgbg3 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX3);
- #else
-
- rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX);
- rgbg1 = vec_perm(rgb1, rgb1, (__vector unsigned char)RGBG_INDEX);
- rgbg2 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX);
- rgbg3 = vec_perm(rgb3, rgb3, (__vector unsigned char)RGBG_INDEX);
- #endif
-
- rg0 = (__vector signed short)VEC_UNPACKHU(rgbg0);
- bg0 = (__vector signed short)VEC_UNPACKLU(rgbg0);
- rg1 = (__vector signed short)VEC_UNPACKHU(rgbg1);
- bg1 = (__vector signed short)VEC_UNPACKLU(rgbg1);
- rg2 = (__vector signed short)VEC_UNPACKHU(rgbg2);
- bg2 = (__vector signed short)VEC_UNPACKLU(rgbg2);
- rg3 = (__vector signed short)VEC_UNPACKHU(rgbg3);
- bg3 = (__vector signed short)VEC_UNPACKLU(rgbg3);
-
-
- y0 = vec_msums(rg0, pw_f0299_f0337, pd_onehalf);
- y1 = vec_msums(rg1, pw_f0299_f0337, pd_onehalf);
- y2 = vec_msums(rg2, pw_f0299_f0337, pd_onehalf);
- y3 = vec_msums(rg3, pw_f0299_f0337, pd_onehalf);
- y0 = vec_msums(bg0, pw_f0114_f0250, y0);
- y1 = vec_msums(bg1, pw_f0114_f0250, y1);
- y2 = vec_msums(bg2, pw_f0114_f0250, y2);
- y3 = vec_msums(bg3, pw_f0114_f0250, y3);
-
- yl = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1,
- shift_pack_index);
- yh = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3,
- shift_pack_index);
- y = vec_pack(yl, yh);
- vec_st(y, 0, outptr);
- }
- }
- }
|