Merge "mips msa vp9 avg subpel variance optimization"
This commit is contained in:
commit
4f7e7c4d49
@ -2095,6 +2095,47 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(5, 6, subpel_variance32x64_msa, 0),
|
||||
make_tuple(6, 5, subpel_variance64x32_msa, 0),
|
||||
make_tuple(6, 6, subpel_variance64x64_msa, 0)));
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_msa =
|
||||
vp9_sub_pixel_avg_variance4x4_msa;
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_msa =
|
||||
vp9_sub_pixel_avg_variance4x8_msa;
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance8x4_msa =
|
||||
vp9_sub_pixel_avg_variance8x4_msa;
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance8x8_msa =
|
||||
vp9_sub_pixel_avg_variance8x8_msa;
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance8x16_msa =
|
||||
vp9_sub_pixel_avg_variance8x16_msa;
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance16x8_msa =
|
||||
vp9_sub_pixel_avg_variance16x8_msa;
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance16x16_msa =
|
||||
vp9_sub_pixel_avg_variance16x16_msa;
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance16x32_msa =
|
||||
vp9_sub_pixel_avg_variance16x32_msa;
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance32x16_msa =
|
||||
vp9_sub_pixel_avg_variance32x16_msa;
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_msa =
|
||||
vp9_sub_pixel_avg_variance32x32_msa;
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance32x64_msa =
|
||||
vp9_sub_pixel_avg_variance32x64_msa;
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance64x32_msa =
|
||||
vp9_sub_pixel_avg_variance64x32_msa;
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_msa =
|
||||
vp9_sub_pixel_avg_variance64x64_msa;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, VP9SubpelAvgVarianceTest,
|
||||
::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_msa, 0),
|
||||
make_tuple(2, 3, subpel_avg_variance4x8_msa, 0),
|
||||
make_tuple(3, 2, subpel_avg_variance8x4_msa, 0),
|
||||
make_tuple(3, 3, subpel_avg_variance8x8_msa, 0),
|
||||
make_tuple(3, 4, subpel_avg_variance8x16_msa, 0),
|
||||
make_tuple(4, 3, subpel_avg_variance16x8_msa, 0),
|
||||
make_tuple(4, 4, subpel_avg_variance16x16_msa, 0),
|
||||
make_tuple(4, 5, subpel_avg_variance16x32_msa, 0),
|
||||
make_tuple(5, 4, subpel_avg_variance32x16_msa, 0),
|
||||
make_tuple(5, 5, subpel_avg_variance32x32_msa, 0),
|
||||
make_tuple(5, 6, subpel_avg_variance32x64_msa, 0),
|
||||
make_tuple(6, 5, subpel_avg_variance64x32_msa, 0),
|
||||
make_tuple(6, 6, subpel_avg_variance64x64_msa, 0)));
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
#endif // HAVE_MSA
|
||||
} // namespace
|
||||
|
@ -801,81 +801,81 @@ add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr,
|
||||
specialize qw/vp9_sub_pixel_variance64x64 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
|
||||
specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
specialize qw/vp9_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp9_sub_pixel_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
|
||||
specialize qw/vp9_sub_pixel_avg_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
specialize qw/vp9_sub_pixel_avg_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp9_sub_pixel_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
|
||||
specialize qw/vp9_sub_pixel_avg_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
specialize qw/vp9_sub_pixel_avg_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp9_sub_pixel_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
|
||||
specialize qw/vp9_sub_pixel_avg_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
specialize qw/vp9_sub_pixel_avg_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp9_sub_pixel_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
|
||||
specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
specialize qw/vp9_sub_pixel_avg_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp9_sub_pixel_variance32x32 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
|
||||
specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
specialize qw/vp9_sub_pixel_avg_variance32x32 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp9_sub_pixel_variance16x16 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
|
||||
specialize qw/vp9_sub_pixel_avg_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
specialize qw/vp9_sub_pixel_avg_variance16x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp9_sub_pixel_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
|
||||
specialize qw/vp9_sub_pixel_avg_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
specialize qw/vp9_sub_pixel_avg_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp9_sub_pixel_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
|
||||
specialize qw/vp9_sub_pixel_avg_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
specialize qw/vp9_sub_pixel_avg_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp9_sub_pixel_variance8x8 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
|
||||
specialize qw/vp9_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
specialize qw/vp9_sub_pixel_avg_variance8x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
|
||||
add_proto qw/unsigned int vp9_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp9_sub_pixel_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
|
||||
specialize qw/vp9_sub_pixel_avg_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
specialize qw/vp9_sub_pixel_avg_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp9_sub_pixel_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
|
||||
specialize qw/vp9_sub_pixel_avg_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";
|
||||
specialize qw/vp9_sub_pixel_avg_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp9_sub_pixel_variance4x4 msa/, "$sse_x86inc", "$ssse3_x86inc";
|
||||
#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
|
||||
|
||||
add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
|
||||
specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
|
||||
specialize qw/vp9_sub_pixel_avg_variance4x4 msa/, "$sse_x86inc", "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
|
||||
specialize qw/vp9_avg_8x8 sse2 neon msa/;
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user