vpx_dsp:loongson optimize vpx_varianceWxH_c,vpx_sub_pixel_varianceWxH_c and vpx_sub_pixel_avg_varianceWxH_c with mmi.

Change-Id: Ia576a721df6312329b599c31cfe1fb1267a9f174
This commit is contained in:
Shiyou Yin 2017-08-24 23:11:58 +08:00
parent d080c92524
commit 9e4647c7ab
4 changed files with 1249 additions and 43 deletions

View File

@ -1547,5 +1547,55 @@ INSTANTIATE_TEST_CASE_P(MMI, VpxMseTest,
MseParams(4, 3, &vpx_mse16x8_mmi),
MseParams(3, 4, &vpx_mse8x16_mmi),
MseParams(3, 3, &vpx_mse8x8_mmi)));
INSTANTIATE_TEST_CASE_P(
MMI, VpxVarianceTest,
::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_mmi),
VarianceParams(6, 5, &vpx_variance64x32_mmi),
VarianceParams(5, 6, &vpx_variance32x64_mmi),
VarianceParams(5, 5, &vpx_variance32x32_mmi),
VarianceParams(5, 4, &vpx_variance32x16_mmi),
VarianceParams(4, 5, &vpx_variance16x32_mmi),
VarianceParams(4, 4, &vpx_variance16x16_mmi),
VarianceParams(4, 3, &vpx_variance16x8_mmi),
VarianceParams(3, 4, &vpx_variance8x16_mmi),
VarianceParams(3, 3, &vpx_variance8x8_mmi),
VarianceParams(3, 2, &vpx_variance8x4_mmi),
VarianceParams(2, 3, &vpx_variance4x8_mmi),
VarianceParams(2, 2, &vpx_variance4x4_mmi)));
INSTANTIATE_TEST_CASE_P(
MMI, VpxSubpelVarianceTest,
::testing::Values(
SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_mmi, 0),
SubpelVarianceParams(6, 5, &vpx_sub_pixel_variance64x32_mmi, 0),
SubpelVarianceParams(5, 6, &vpx_sub_pixel_variance32x64_mmi, 0),
SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_mmi, 0),
SubpelVarianceParams(5, 4, &vpx_sub_pixel_variance32x16_mmi, 0),
SubpelVarianceParams(4, 5, &vpx_sub_pixel_variance16x32_mmi, 0),
SubpelVarianceParams(4, 4, &vpx_sub_pixel_variance16x16_mmi, 0),
SubpelVarianceParams(4, 3, &vpx_sub_pixel_variance16x8_mmi, 0),
SubpelVarianceParams(3, 4, &vpx_sub_pixel_variance8x16_mmi, 0),
SubpelVarianceParams(3, 3, &vpx_sub_pixel_variance8x8_mmi, 0),
SubpelVarianceParams(3, 2, &vpx_sub_pixel_variance8x4_mmi, 0),
SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_mmi, 0),
SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_mmi, 0)));
INSTANTIATE_TEST_CASE_P(
MMI, VpxSubpelAvgVarianceTest,
::testing::Values(
SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_mmi, 0),
SubpelAvgVarianceParams(6, 5, &vpx_sub_pixel_avg_variance64x32_mmi, 0),
SubpelAvgVarianceParams(5, 6, &vpx_sub_pixel_avg_variance32x64_mmi, 0),
SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_mmi, 0),
SubpelAvgVarianceParams(5, 4, &vpx_sub_pixel_avg_variance32x16_mmi, 0),
SubpelAvgVarianceParams(4, 5, &vpx_sub_pixel_avg_variance16x32_mmi, 0),
SubpelAvgVarianceParams(4, 4, &vpx_sub_pixel_avg_variance16x16_mmi, 0),
SubpelAvgVarianceParams(4, 3, &vpx_sub_pixel_avg_variance16x8_mmi, 0),
SubpelAvgVarianceParams(3, 4, &vpx_sub_pixel_avg_variance8x16_mmi, 0),
SubpelAvgVarianceParams(3, 3, &vpx_sub_pixel_avg_variance8x8_mmi, 0),
SubpelAvgVarianceParams(3, 2, &vpx_sub_pixel_avg_variance8x4_mmi, 0),
SubpelAvgVarianceParams(2, 3, &vpx_sub_pixel_avg_variance4x8_mmi, 0),
SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_mmi, 0)));
#endif // HAVE_MMI
} // namespace

File diff suppressed because it is too large Load Diff

View File

@ -1053,43 +1053,43 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq "
# Variance
#
add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance64x64 sse2 avx2 neon msa/;
specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi/;
add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance64x32 sse2 avx2 neon msa/;
specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi/;
add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance32x64 sse2 neon msa/;
specialize qw/vpx_variance32x64 sse2 neon msa mmi/;
add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance32x32 sse2 avx2 neon msa/;
specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi/;
add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance32x16 sse2 avx2 neon msa/;
specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi/;
add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance16x32 sse2 neon msa/;
specialize qw/vpx_variance16x32 sse2 neon msa mmi/;
add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance16x16 sse2 avx2 neon msa/;
specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi/;
add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance16x8 sse2 neon msa/;
specialize qw/vpx_variance16x8 sse2 neon msa mmi/;
add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance8x16 sse2 neon msa/;
specialize qw/vpx_variance8x16 sse2 neon msa mmi/;
add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance8x8 sse2 neon msa/;
specialize qw/vpx_variance8x8 sse2 neon msa mmi/;
add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance8x4 sse2 neon msa/;
specialize qw/vpx_variance8x4 sse2 neon msa mmi/;
add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance4x8 sse2 neon msa/;
specialize qw/vpx_variance4x8 sse2 neon msa mmi/;
add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance4x4 sse2 neon msa/;
specialize qw/vpx_variance4x4 sse2 neon msa mmi/;
#
# Specialty Variance
@ -1125,82 +1125,82 @@ add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred,
# Subpixel Variance
#
add_proto qw/uint32_t vpx_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance64x32 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_variance64x32 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance32x64 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_variance32x64 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance32x16 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_variance32x16 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance16x32 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_variance16x32 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance16x16 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_variance16x16 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance16x8 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_variance16x8 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance8x16 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_variance8x16 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance8x8 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_variance8x8 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance8x4 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_variance8x4 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance4x8 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_variance4x8 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance4x4 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_variance4x4 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance64x64 neon avx2 msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_avg_variance64x64 neon avx2 msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance64x32 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_avg_variance64x32 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance32x64 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_avg_variance32x64 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance32x32 neon avx2 msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_avg_variance32x32 neon avx2 msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance32x16 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_avg_variance32x16 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance16x32 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_avg_variance16x32 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance16x16 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_avg_variance16x16 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance16x8 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_avg_variance16x8 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance8x16 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_avg_variance8x16 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance8x8 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_avg_variance8x8 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance8x4 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_avg_variance8x4 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance4x8 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_avg_variance4x8 neon msa mmi sse2 ssse3/;
add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance4x4 neon msa sse2 ssse3/;
specialize qw/vpx_sub_pixel_avg_variance4x4 neon msa mmi sse2 ssse3/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

View File

@ -40,10 +40,10 @@
"dsll " #reg1 ", " #reg2 ", " #shift " \n\t"
#define MMI_MTC1(reg, fp) \
"dmtc1 " #reg " " #fp " \n\t"
"dmtc1 " #reg ", " #fp " \n\t"
#define MMI_LI(reg, immediate) \
"dli " #reg " " #immediate " \n\t"
"dli " #reg ", " #immediate " \n\t"
#else
#define mips_reg int32_t
@ -69,10 +69,10 @@
"sll " #reg1 ", " #reg2 ", " #shift " \n\t"
#define MMI_MTC1(reg, fp) \
"mtc1 " #reg " " #fp " \n\t"
"mtc1 " #reg ", " #fp " \n\t"
#define MMI_LI(reg, immediate) \
"li " #reg " " #immediate " \n\t"
"li " #reg ", " #immediate " \n\t"
#endif /* HAVE_MIPS64 */