Add mips msa vpx_minmax_8x8 function

average improvement ~4x-5x

Change-Id: I83aee9977534fddb8a9b80d31af646c0b6b1a8c3
This commit is contained in:
Kaustubh Raste 2017-01-31 10:00:43 +05:30
parent 280ad35553
commit df7e1fecc1
3 changed files with 53 additions and 1 deletions

View File

@ -127,4 +127,9 @@ INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,
::testing::Values(&vpx_minmax_8x8_neon));
#endif
#if HAVE_MSA
INSTANTIATE_TEST_CASE_P(MSA, MinMaxTest,
::testing::Values(&vpx_minmax_8x8_msa));
#endif
} // namespace

View File

@ -677,3 +677,50 @@ int vpx_vector_var_msa(const int16_t *ref, const int16_t *src, const int bwl) {
return var;
}
void vpx_minmax_8x8_msa(const uint8_t *s, int p, const uint8_t *d, int dp,
int *min, int *max) {
v16u8 s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7;
v16u8 diff0, diff1, diff2, diff3, min0, min1, max0, max1;
LD_UB8(s, p, s0, s1, s2, s3, s4, s5, s6, s7);
LD_UB8(d, dp, d0, d1, d2, d3, d4, d5, d6, d7);
PCKEV_D4_UB(s1, s0, s3, s2, s5, s4, s7, s6, s0, s1, s2, s3);
PCKEV_D4_UB(d1, d0, d3, d2, d5, d4, d7, d6, d0, d1, d2, d3);
diff0 = __msa_asub_u_b(s0, d0);
diff1 = __msa_asub_u_b(s1, d1);
diff2 = __msa_asub_u_b(s2, d2);
diff3 = __msa_asub_u_b(s3, d3);
min0 = __msa_min_u_b(diff0, diff1);
min1 = __msa_min_u_b(diff2, diff3);
min0 = __msa_min_u_b(min0, min1);
max0 = __msa_max_u_b(diff0, diff1);
max1 = __msa_max_u_b(diff2, diff3);
max0 = __msa_max_u_b(max0, max1);
min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 8);
min0 = __msa_min_u_b(min0, min1);
max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 8);
max0 = __msa_max_u_b(max0, max1);
min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 4);
min0 = __msa_min_u_b(min0, min1);
max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 4);
max0 = __msa_max_u_b(max0, max1);
min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 2);
min0 = __msa_min_u_b(min0, min1);
max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 2);
max0 = __msa_max_u_b(max0, max1);
min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 1);
min0 = __msa_min_u_b(min0, min1);
max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 1);
max0 = __msa_max_u_b(max0, max1);
*min = min0[0];
*max = max0[0];
}

View File

@ -883,7 +883,7 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
specialize qw/vpx_avg_4x4 sse2 neon msa/;
add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
specialize qw/vpx_minmax_8x8 sse2 neon/;
specialize qw/vpx_minmax_8x8 sse2 neon msa/;
add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
specialize qw/vpx_hadamard_8x8 sse2 neon msa/, "$ssse3_x86_64";