MIPS: dspr2: added optimization for function ClampedAddSubtractHalf
Change-Id: Iec22e897a4f56e79c18ec00f8caa9cefac67f186
This commit is contained in:
		@@ -65,13 +65,6 @@ static const int kC2 = 35468;
 | 
			
		||||
  "ulw              %["#O2"],  64(%[dst])                     \n\t"            \
 | 
			
		||||
  "ulw              %["#O3"],  96(%[dst])                     \n\t"
 | 
			
		||||
 | 
			
		||||
// O - output
 | 
			
		||||
// I - input (macro doesn't change it)
 | 
			
		||||
#define ADD_SUB_HALVES(O0, O1,                                                 \
 | 
			
		||||
                       I0, I1)                                                 \
 | 
			
		||||
  "addq.ph          %["#O0"],   %["#I0"],  %["#I1"]           \n\t"            \
 | 
			
		||||
  "subq.ph          %["#O1"],   %["#I0"],  %["#I1"]           \n\t"
 | 
			
		||||
 | 
			
		||||
static void TransformDC(const int16_t* in, uint8_t* dst) {
 | 
			
		||||
  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
 | 
			
		||||
 | 
			
		||||
@@ -517,17 +510,7 @@ static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
 | 
			
		||||
  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#undef OUTPUT_EARLY_CLOBBER_REGS_18
 | 
			
		||||
#undef OUTPUT_EARLY_CLOBBER_REGS_10
 | 
			
		||||
#undef INSERT_HALF_X2
 | 
			
		||||
#undef SRA_16
 | 
			
		||||
#undef LOAD_IN_X2
 | 
			
		||||
#undef ADD_SUB_HALVES
 | 
			
		||||
#undef MUL_SHIFT_SUM
 | 
			
		||||
#undef PACK_2_HALVES_TO_WORD
 | 
			
		||||
#undef LOAD_DST
 | 
			
		||||
#undef CONVERT_2_BYTES_TO_HALF
 | 
			
		||||
#undef SHIFT_R_SUM_X2
 | 
			
		||||
#undef STORE_SAT_SUM_X2
 | 
			
		||||
#undef MUL
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -29,6 +29,14 @@ static const int kC2 = 35468;
 | 
			
		||||
  "ulw              %["#O2"],  32(%[ref])                     \n\t"            \
 | 
			
		||||
  "ulw              %["#O3"],  48(%[ref])                     \n\t"
 | 
			
		||||
 | 
			
		||||
// temp0[31..16 | 15..0] = temp0[31..16 | 15..0] + temp8[31..16 | 15..0]
 | 
			
		||||
// temp0[31..16 | 15..0] = temp0[31..16 <<(s) 7 | 15..0 <<(s) 7]
 | 
			
		||||
// temp1..temp7 same as temp0
 | 
			
		||||
// precrqu_s.qb.ph temp0, temp1, temp0:
 | 
			
		||||
//   temp0 = temp1[31..24] | temp1[15..8] | temp0[31..24] | temp0[15..8]
 | 
			
		||||
// store temp0 to dst
 | 
			
		||||
// IO - input/output
 | 
			
		||||
// I - input (macro doesn't change it)
 | 
			
		||||
#define STORE_SAT_SUM_X2(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7,               \
 | 
			
		||||
                         I0, I1, I2, I3, I4, I5, I6, I7)                       \
 | 
			
		||||
  "addq.ph          %["#IO0"],  %["#IO0"],  %["#I0"]          \n\t"            \
 | 
			
		||||
@@ -128,19 +136,8 @@ static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#undef OUTPUT_EARLY_CLOBBER_REGS_18
 | 
			
		||||
#undef OUTPUT_EARLY_CLOBBER_REGS_10
 | 
			
		||||
#undef INSERT_HALF_X2
 | 
			
		||||
#undef SRA_16
 | 
			
		||||
#undef LOAD_IN_X2
 | 
			
		||||
#undef ADD_SUB_HALVES
 | 
			
		||||
#undef MUL_SHIFT_SUM
 | 
			
		||||
#undef PACK_2_HALVES_TO_WORD
 | 
			
		||||
#undef LOAD_REF
 | 
			
		||||
#undef CONVERT_2_BYTES_TO_HALF
 | 
			
		||||
#undef SHIFT_R_SUM_X2
 | 
			
		||||
#undef STORE_SAT_SUM_X2
 | 
			
		||||
#undef MUL
 | 
			
		||||
 | 
			
		||||
#endif  // WEBP_USE_MIPS_DSP_R2
 | 
			
		||||
 | 
			
		||||
@@ -152,5 +149,5 @@ extern WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPSdspR2(void);
 | 
			
		||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPSdspR2(void) {
 | 
			
		||||
#if defined(WEBP_USE_MIPS_DSP_R2)
 | 
			
		||||
  VP8ITransform = ITransform;
 | 
			
		||||
#endif  // WEBP_USE_MIPS32
 | 
			
		||||
#endif  // WEBP_USE_MIPS_DSP_R2
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -90,6 +90,40 @@ MAP_COLOR_FUNCS(MapAlpha, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
 | 
			
		||||
 | 
			
		||||
#undef MAP_COLOR_FUNCS
 | 
			
		||||
 | 
			
		||||
static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
 | 
			
		||||
                                                   uint32_t c2) {
 | 
			
		||||
  int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
 | 
			
		||||
  __asm__ volatile (
 | 
			
		||||
   "adduh.qb         %[tmp5], %[c0],   %[c1]       \n\t"
 | 
			
		||||
   "preceu.ph.qbr    %[tmp3], %[c2]                \n\t"
 | 
			
		||||
   "preceu.ph.qbr    %[tmp1], %[tmp5]              \n\t"
 | 
			
		||||
   "preceu.ph.qbl    %[tmp2], %[tmp5]              \n\t"
 | 
			
		||||
   "preceu.ph.qbl    %[tmp4], %[c2]                \n\t"
 | 
			
		||||
   "subq.ph          %[tmp3], %[tmp1], %[tmp3]     \n\t"
 | 
			
		||||
   "subq.ph          %[tmp4], %[tmp2], %[tmp4]     \n\t"
 | 
			
		||||
   "shrl.ph          %[tmp5], %[tmp3], 15          \n\t"
 | 
			
		||||
   "shrl.ph          %[tmp6], %[tmp4], 15          \n\t"
 | 
			
		||||
   "addq.ph          %[tmp3], %[tmp3], %[tmp5]     \n\t"
 | 
			
		||||
   "addq.ph          %[tmp4], %[tmp6], %[tmp4]     \n\t"
 | 
			
		||||
   "shra.ph          %[tmp3], %[tmp3], 1           \n\t"
 | 
			
		||||
   "shra.ph          %[tmp4], %[tmp4], 1           \n\t"
 | 
			
		||||
   "addq.ph          %[tmp1], %[tmp1], %[tmp3]     \n\t"
 | 
			
		||||
   "addq.ph          %[tmp2], %[tmp2], %[tmp4]     \n\t"
 | 
			
		||||
   "shll_s.ph        %[tmp1], %[tmp1], 7           \n\t"
 | 
			
		||||
   "shll_s.ph        %[tmp2], %[tmp2], 7           \n\t"
 | 
			
		||||
   "precrqu_s.qb.ph  %[tmp1], %[tmp2], %[tmp1]     \n\t"
 | 
			
		||||
   : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3),
 | 
			
		||||
     [tmp4]"=&r"(tmp4), [tmp5]"=&r"(tmp5), [tmp6]"=r"(tmp6)
 | 
			
		||||
   : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
 | 
			
		||||
   : "memory"
 | 
			
		||||
  );
 | 
			
		||||
  return tmp1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
 | 
			
		||||
  return ClampedAddSubtractHalf(left, top[0], top[-1]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif  // WEBP_USE_MIPS_DSP_R2
 | 
			
		||||
 | 
			
		||||
//------------------------------------------------------------------------------
 | 
			
		||||
@@ -100,6 +134,7 @@ void VP8LDspInitMIPSdspR2(void) {
 | 
			
		||||
#if defined(WEBP_USE_MIPS_DSP_R2)
 | 
			
		||||
  VP8LMapColor32b = MapARGB;
 | 
			
		||||
  VP8LMapColor8b = MapAlpha;
 | 
			
		||||
  VP8LPredictors[13] = Predictor13;
 | 
			
		||||
#endif  // WEBP_USE_MIPS_DSP_R2
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user