6 #define LD_V(RTYPE, psrc) *((RTYPE *)(psrc)) 7 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) 8 #define ST_B2(RTYPE, in0, in1, pdst, stride) \ 10 ST_B(RTYPE, in0, (pdst)); \ 11 ST_B(RTYPE, in1, (pdst) + stride); \ 13 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) 15 #if YUV_FORMAT == YUV_FORMAT_420 17 #define READ_Y(y_ptr) \ 18 y = LD_V(v16i8, y_ptr); \ 21 u_temp = LD_V(v16i8, u_ptr); \ 22 v_temp = LD_V(v16i8, v_ptr); \ 25 #error READ_UV unimplemented 28 #define PACK_RGBA_32(R1, R2, G1, G2, B1, B2, A1, A2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, RGB7, RGB8) \ 30 v8u16 ab_r, ab_l, gr_r, gr_l; \ 31 ab_r = (v8u16)__msa_ilvr_b((v16i8)B1, (v16i8)A1); \ 32 ab_l = (v8u16)__msa_ilvl_b((v16i8)B1, (v16i8)A1); \ 33 gr_r = (v8u16)__msa_ilvr_b((v16i8)R1, (v16i8)G1); \ 34 gr_l = (v8u16)__msa_ilvl_b((v16i8)R1, (v16i8)G1); \ 35 RGB1 = (v16u8)__msa_ilvr_h((v8i16)gr_r, (v8i16)ab_r); \ 36 RGB2 = (v16u8)__msa_ilvl_h((v8i16)gr_r, (v8i16)ab_r); \ 37 RGB3 = (v16u8)__msa_ilvr_h((v8i16)gr_l, (v8i16)ab_l); \ 38 RGB4 = (v16u8)__msa_ilvl_h((v8i16)gr_l, (v8i16)ab_l); \ 39 ab_r = (v8u16)__msa_ilvr_b((v16i8)B2, (v16i8)A2); \ 40 ab_l = (v8u16)__msa_ilvl_b((v16i8)B2, (v16i8)A2); \ 41 gr_r = (v8u16)__msa_ilvr_b((v16i8)R2, (v16i8)G2); \ 42 gr_l = (v8u16)__msa_ilvl_b((v16i8)R2, (v16i8)G2); \ 43 RGB5 = (v16u8)__msa_ilvr_h((v8i16)gr_r, (v8i16)ab_r); \ 44 RGB6 = (v16u8)__msa_ilvl_h((v8i16)gr_r, (v8i16)ab_r); \ 45 RGB7 = (v16u8)__msa_ilvr_h((v8i16)gr_l, (v8i16)ab_l); \ 46 RGB8 = (v16u8)__msa_ilvl_h((v8i16)gr_l, (v8i16)ab_l); \ 49 #define PACK_RGB24_32_STEP(R, G, B, RGB1, RGB2, RGB3) \ 50 RGB1 = __msa_ilvr_b(G, R); \ 51 RGB1 = __msa_vshf_b(mask1, B, RGB1); \ 52 RGB2 = __msa_vshf_b(mask2, B, G); \ 53 RGB2 = __msa_vshf_b(mask3, R, RGB2); \ 54 RGB3 = __msa_vshf_b(mask4, R, B); \ 55 RGB3 = __msa_vshf_b(mask5, G, RGB3); \ 58 #define PACK_RGB24_32(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \ 59 PACK_RGB24_32_STEP(R1, G1, B1, RGB1, RGB2, RGB3); \ 60 PACK_RGB24_32_STEP(R2, G2, B2, RGB4, RGB5, RGB6); \ 62 #if RGB_FORMAT == RGB_FORMAT_RGB24 65 v16u8 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6; \ 66 v16u8 rgb_7, rgb_8, rgb_9, rgb_10, rgb_11, rgb_12; \ 67 PACK_RGB24_32(r_8_11, r_8_12, g_8_11, g_8_12, b_8_11, b_8_12, \ 68 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6) \ 69 PACK_RGB24_32(r_8_21, r_8_22, g_8_21, g_8_22, b_8_21, b_8_22, \ 70 rgb_7, rgb_8, rgb_9, rgb_10, rgb_11, rgb_12) \ 73 #elif RGB_FORMAT == RGB_FORMAT_RGBA 76 v16u8 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \ 77 v16u8 rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16; \ 78 v16u8 a = __msa_ldi_b(0xFF); \ 79 PACK_RGBA_32(r_8_11, r_8_12, g_8_11, g_8_12, b_8_11, b_8_12, a, a, \ 80 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8) \ 81 PACK_RGBA_32(r_8_21, r_8_22, g_8_21, g_8_22, b_8_21, b_8_22, a, a, \ 82 rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16)\ 84 #elif RGB_FORMAT == RGB_FORMAT_BGRA 87 v16u8 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \ 88 v16u8 rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16; \ 89 v16u8 a = __msa_ldi_b(0xFF); \ 90 PACK_RGBA_32(b_8_11, b_8_12, g_8_11, g_8_12, r_8_11, r_8_12, a, a, \ 91 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8) \ 92 PACK_RGBA_32(b_8_21, b_8_22, g_8_21, g_8_22, r_8_21, r_8_22, a, a, \ 93 rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16)\ 95 #elif RGB_FORMAT == RGB_FORMAT_ARGB 98 v16u8 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \ 99 v16u8 rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16; \ 100 v16u8 a = __msa_ldi_b(0xFF); \ 101 PACK_RGBA_32(a, a, r_8_11, r_8_12, g_8_11, g_8_12, b_8_11, b_8_12, \ 102 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8) \ 103 PACK_RGBA_32(a, a, r_8_21, r_8_22, g_8_21, g_8_22, b_8_21, b_8_22, \ 104 rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16)\ 106 #elif RGB_FORMAT == RGB_FORMAT_ABGR 109 v16u8 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \ 110 v16u8 rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16; \ 111 v16u8 a = __msa_ldi_b(0xFF); \ 112 PACK_RGBA_32(a, a, b_8_11, b_8_12, g_8_11, g_8_12, r_8_11, r_8_12, \ 113 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8) \ 114 PACK_RGBA_32(a, a, b_8_21, b_8_22, g_8_21, g_8_22, r_8_21, r_8_22, \ 115 rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16)\ 118 #error PACK_PIXEL unimplemented 121 #if RGB_FORMAT == RGB_FORMAT_RGB24 \ 124 ST_UB2(rgb_1, rgb_2, rgb_ptr1, 16); \ 125 ST_UB2(rgb_3, rgb_4, rgb_ptr1 + 32, 16); \ 126 ST_UB2(rgb_5, rgb_6, rgb_ptr1 + 64, 16); \ 129 ST_UB2(rgb_7, rgb_8, rgb_ptr2, 16); \ 130 ST_UB2(rgb_9, rgb_10, rgb_ptr2 + 32, 16); \ 131 ST_UB2(rgb_11, rgb_12, rgb_ptr2 + 64, 16); \ 133 #elif RGB_FORMAT == RGB_FORMAT_RGBA || RGB_FORMAT == RGB_FORMAT_BGRA || \ 134 RGB_FORMAT == RGB_FORMAT_ARGB || RGB_FORMAT == RGB_FORMAT_ABGR \ 137 ST_UB2(rgb_1, rgb_2, rgb_ptr1, 16); \ 138 ST_UB2(rgb_3, rgb_4, rgb_ptr1 + 32, 16); \ 139 ST_UB2(rgb_5, rgb_6, rgb_ptr1 + 64, 16); \ 140 ST_UB2(rgb_7, rgb_8, rgb_ptr1 + 96, 16); \ 143 ST_UB2(rgb_9, rgb_10, rgb_ptr2, 16); \ 144 ST_UB2(rgb_11, rgb_12, rgb_ptr2 + 32, 16); \ 145 ST_UB2(rgb_13, rgb_14, rgb_ptr2 + 64, 16); \ 146 ST_UB2(rgb_15, rgb_16, rgb_ptr2 + 96, 16); \ 149 #error SAVE_LINE unimplemented 153 #define UV2RGB_16(U,V,R1,G1,B1,R2,G2,B2) \ 154 r_temp = __msa_mulv_h(V, v2r); \ 155 g_temp = __msa_mulv_h(U, u2g); \ 156 g_temp = __msa_maddv_h(V, v2g, g_temp); \ 157 b_temp = __msa_mulv_h(U, u2b); \ 158 R1 = (v8i16)__msa_ilvr_h((v8i16)r_temp, (v8i16)r_temp); \ 159 G1 = (v8i16)__msa_ilvr_h((v8i16)g_temp, (v8i16)g_temp); \ 160 B1 = (v8i16)__msa_ilvr_h((v8i16)b_temp, (v8i16)b_temp); \ 161 R2 = (v8i16)__msa_ilvl_h((v8i16)r_temp, (v8i16)r_temp); \ 162 G2 = (v8i16)__msa_ilvl_h((v8i16)g_temp, (v8i16)g_temp); \ 163 B2 = (v8i16)__msa_ilvl_h((v8i16)b_temp, (v8i16)b_temp); \ 166 #define ADD_Y2RGB_16(Y1, Y2, R1, G1, B1, R2, G2, B2) \ 169 Y1 = __msa_mulv_h(Y1, yf); \ 170 Y2 = __msa_mulv_h(Y2, yf); \ 177 R1 = __msa_srai_h(R1, PRECISION); \ 178 G1 = __msa_srai_h(G1, PRECISION); \ 179 B1 = __msa_srai_h(B1, PRECISION); \ 180 R2 = __msa_srai_h(R2, PRECISION); \ 181 G2 = __msa_srai_h(G2, PRECISION); \ 182 B2 = __msa_srai_h(B2, PRECISION); \ 185 #define CLIP(in0, in1, in2, in3, in4, in5) \ 187 in0 = __msa_maxi_s_h(in0, 0); \ 188 in1 = __msa_maxi_s_h(in1, 0); \ 189 in2 = __msa_maxi_s_h(in2, 0); \ 190 in3 = __msa_maxi_s_h(in3, 0); \ 191 in4 = __msa_maxi_s_h(in4, 0); \ 192 in5 = __msa_maxi_s_h(in5, 0); \ 193 in0 = __msa_sat_u_h(in0, 7); \ 194 in1 = __msa_sat_u_h(in1, 7); \ 195 in2 = __msa_sat_u_h(in2, 7); \ 196 in3 = __msa_sat_u_h(in3, 7); \ 197 in4 = __msa_sat_u_h(in4, 7); \ 198 in5 = __msa_sat_u_h(in5, 7); \ 202 v16i8 y, u_temp, v_temp; \ 203 v16i8 r_8_11, g_8_11, b_8_11, r_8_21, g_8_21, b_8_21; \ 204 v16i8 r_8_12, g_8_12, b_8_12, r_8_22, g_8_22, b_8_22; \ 205 v8i16 u, v, r_temp, g_temp, b_temp; \ 206 v8i16 r_1, g_1, b_1, r_2, g_2, b_2; \ 208 v8i16 r_uv_1, g_uv_1, b_uv_1, r_uv_2, g_uv_2, b_uv_2; \ 213 u = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)u_temp); \ 214 v = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)v_temp); \ 217 UV2RGB_16(u, v, r_1, g_1, b_1, r_2, g_2, b_2); \ 218 r_uv_1 = r_1; g_uv_1 = g_1; b_uv_1 = b_1; \ 219 r_uv_2 = r_2; g_uv_2 = g_2; b_uv_2 = b_2; \ 221 y_1 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)y); \ 222 y_2 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)y); \ 223 ADD_Y2RGB_16(y_1, y_2, r_1, g_1, b_1, r_2, g_2, b_2) \ 224 CLIP(r_1, g_1, b_1, r_2, g_2, b_2); \ 225 r_8_11 = __msa_pckev_b((v16i8)r_2, (v16i8)r_1); \ 226 g_8_11 = __msa_pckev_b((v16i8)g_2, (v16i8)g_1); \ 227 b_8_11 = __msa_pckev_b((v16i8)b_2, (v16i8)b_1); \ 230 r_1 = r_uv_1; g_1 = g_uv_1; b_1 = b_uv_1; \ 231 r_2 = r_uv_2; g_2 = g_uv_2; b_2 = b_uv_2; \ 234 y_1 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)y); \ 235 y_2 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)y); \ 236 ADD_Y2RGB_16(y_1, y_2, r_1, g_1, b_1, r_2, g_2, b_2) \ 237 CLIP(r_1, g_1, b_1, r_2, g_2, b_2); \ 238 r_8_21 = __msa_pckev_b((v16i8)r_2, (v16i8)r_1); \ 239 g_8_21 = __msa_pckev_b((v16i8)g_2, (v16i8)g_1); \ 240 b_8_21 = __msa_pckev_b((v16i8)b_2, (v16i8)b_1); \ 243 u = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)u_temp); \ 244 v = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)v_temp); \ 247 UV2RGB_16(u, v, r_1, g_1, b_1, r_2, g_2, b_2); \ 248 r_uv_1 = r_1; g_uv_1 = g_1; b_uv_1 = b_1; \ 249 r_uv_2 = r_2; g_uv_2 = g_2; b_uv_2 = b_2; \ 250 READ_Y(y_ptr1 + 16 * y_pixel_stride) \ 251 y_1 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)y); \ 252 y_2 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)y); \ 253 ADD_Y2RGB_16(y_1, y_2, r_1, g_1, b_1, r_2, g_2, b_2) \ 254 CLIP(r_1, g_1, b_1, r_2, g_2, b_2); \ 255 r_8_12 = __msa_pckev_b((v16i8)r_2, (v16i8)r_1); \ 256 g_8_12 = __msa_pckev_b((v16i8)g_2, (v16i8)g_1); \ 257 b_8_12 = __msa_pckev_b((v16i8)b_2, (v16i8)b_1); \ 260 r_1 = r_uv_1; g_1 = g_uv_1; b_1 = b_uv_1; \ 261 r_2 = r_uv_2; g_2 = g_uv_2; b_2 = b_uv_2; \ 263 READ_Y(y_ptr2 + 16 * y_pixel_stride) \ 264 y_1 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)y); \ 265 y_2 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)y); \ 266 ADD_Y2RGB_16(y_1, y_2, r_1, g_1, b_1, r_2, g_2, b_2) \ 267 CLIP(r_1, g_1, b_1, r_2, g_2, b_2); \ 268 r_8_22 = __msa_pckev_b((v16i8)r_2, (v16i8)r_1); \ 269 g_8_22 = __msa_pckev_b((v16i8)g_2, (v16i8)g_1); \ 270 b_8_22 = __msa_pckev_b((v16i8)b_2, (v16i8)b_1); \ 280 #if YUV_FORMAT == YUV_FORMAT_420 286 #if RGB_FORMAT == RGB_FORMAT_RGB565 287 const int rgb_pixel_stride = 2;
288 #elif RGB_FORMAT == RGB_FORMAT_RGB24 289 const int rgb_pixel_stride = 3;
290 v16i8 mask1 = {0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10};
291 v16i8 mask2 = {5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 0, 0, 0, 0, 0};
292 v16i8 mask3 = {0, 1, 22, 2, 3, 23, 4, 5, 24, 6, 7, 25, 8, 9, 26, 10};
293 v16i8 mask4 = {10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15, 0, 0, 0, 0, 0};
294 v16i8 mask5 = {0, 1, 27, 2, 3, 28, 4, 5, 29, 6, 7, 30, 8, 9, 31, 10};
295 #elif RGB_FORMAT == RGB_FORMAT_RGBA || RGB_FORMAT_BGRA || \ 296 RGB_FORMAT == RGB_FORMAT_ARGB || RGB_FORMAT_ABGR 297 const int rgb_pixel_stride = 4;
299 #error Unknown RGB pixel size 307 v8i16
bias = __msa_fill_h(128);
308 v8i16 shift = __msa_fill_h(param->
y_shift);
309 v8i16 yf = __msa_fill_h(param->
y_factor);
314 const uint8_t *y_ptr1 = Y + ypos * Y_stride,
315 *y_ptr2 = Y + (ypos + 1) * Y_stride,
316 *u_ptr = U + (ypos/uv_y_sample_interval) * UV_stride,
318 uint8_t *rgb_ptr1 = RGB + ypos * RGB_stride,
319 *rgb_ptr2 = RGB + (ypos + 1) * RGB_stride;
321 for (xpos = 0; xpos < (width - 31); xpos += 32)
327 if (uv_y_sample_interval > 1)
337 rgb_ptr1 += 32 * rgb_pixel_stride;
338 rgb_ptr2 += 32 * rgb_pixel_stride;
342 if (uv_y_sample_interval == 2 && ypos == (height - 1)) {
343 const uint8_t *y_ptr = Y + ypos * Y_stride,
345 *v_ptr = V + (ypos/uv_y_sample_interval) * UV_stride;
346 uint8_t *rgb_ptr = RGB + ypos * RGB_stride;
348 STD_FUNCTION_NAME(width, 1, y_ptr, u_ptr, v_ptr, Y_stride, UV_stride, rgb_ptr, RGB_stride, yuv_type);
353 int converted = (width & ~31);
354 if (converted != width)
359 uint8_t *rgb_ptr = RGB + converted * rgb_pixel_stride;
361 STD_FUNCTION_NAME(width-converted, height, y_ptr, u_ptr, v_ptr, Y_stride, UV_stride, rgb_ptr, RGB_stride, yuv_type);
366 #undef MSA_FUNCTION_NAME 367 #undef STD_FUNCTION_NAME 377 #undef PACK_RGB24_32_STEP
#define uv_x_sample_interval
GLint GLint GLsizei width
void STD_FUNCTION_NAME(uint32_t width, uint32_t height, const uint8_t *Y, const uint8_t *U, const uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, uint8_t *RGB, uint32_t RGB_stride, YCbCrType yuv_type)
static const YUV2RGBParam YUV2RGB[3]
GLint GLint GLsizei GLsizei height
#define uv_y_sample_interval
void MSA_FUNCTION_NAME(uint32_t width, uint32_t height, const uint8_t *Y, const uint8_t *U, const uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, uint8_t *RGB, uint32_t RGB_stride, YCbCrType yuv_type)