diff --git a/test/sad_test.cc b/test/sad_test.cc index 9555a9ab5..15667be2b 100644 --- a/test/sad_test.cc +++ b/test/sad_test.cc @@ -332,15 +332,31 @@ INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests)); #if CONFIG_VP9_ENCODER const sad_n_by_n_by_4_fn_t sad_64x64x4d_c = vp9_sad64x64x4d_c; +const sad_n_by_n_by_4_fn_t sad_64x32x4d_c = vp9_sad64x32x4d_c; +const sad_n_by_n_by_4_fn_t sad_32x64x4d_c = vp9_sad32x64x4d_c; const sad_n_by_n_by_4_fn_t sad_32x32x4d_c = vp9_sad32x32x4d_c; +const sad_n_by_n_by_4_fn_t sad_32x16x4d_c = vp9_sad32x16x4d_c; +const sad_n_by_n_by_4_fn_t sad_16x32x4d_c = vp9_sad16x32x4d_c; const sad_n_by_n_by_4_fn_t sad_16x16x4d_c = vp9_sad16x16x4d_c; +const sad_n_by_n_by_4_fn_t sad_16x8x4d_c = vp9_sad16x8x4d_c; +const sad_n_by_n_by_4_fn_t sad_8x16x4d_c = vp9_sad8x16x4d_c; const sad_n_by_n_by_4_fn_t sad_8x8x4d_c = vp9_sad8x8x4d_c; +const sad_n_by_n_by_4_fn_t sad_8x4x4d_c = vp9_sad8x4x4d_c; +const sad_n_by_n_by_4_fn_t sad_4x8x4d_c = vp9_sad4x8x4d_c; const sad_n_by_n_by_4_fn_t sad_4x4x4d_c = vp9_sad4x4x4d_c; INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::Values( make_tuple(64, 64, sad_64x64x4d_c), + make_tuple(64, 32, sad_64x32x4d_c), + make_tuple(32, 64, sad_32x64x4d_c), make_tuple(32, 32, sad_32x32x4d_c), + make_tuple(32, 16, sad_32x16x4d_c), + make_tuple(16, 32, sad_16x32x4d_c), make_tuple(16, 16, sad_16x16x4d_c), + make_tuple(16, 8, sad_16x8x4d_c), + make_tuple(8, 16, sad_8x16x4d_c), make_tuple(8, 8, sad_8x8x4d_c), + make_tuple(8, 4, sad_8x4x4d_c), + make_tuple(4, 8, sad_4x8x4d_c), make_tuple(4, 4, sad_4x4x4d_c))); #endif @@ -407,8 +423,10 @@ const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse; INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values( make_tuple(4, 4, sad_4x4_sse_vp9))); +const sad_n_by_n_by_4_fn_t sad_4x8x4d_sse = vp9_sad4x8x4d_sse; const sad_n_by_n_by_4_fn_t sad_4x4x4d_sse = vp9_sad4x4x4d_sse; INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values( + make_tuple(4, 8, sad_4x8x4d_sse), make_tuple(4, 4, sad_4x4x4d_sse))); #endif #endif @@ -450,18 +468,28 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests)); #if CONFIG_VP9_ENCODER const sad_n_by_n_by_4_fn_t sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2; +const sad_n_by_n_by_4_fn_t sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2; +const sad_n_by_n_by_4_fn_t sad_32x64x4d_sse2 = vp9_sad32x64x4d_sse2; const sad_n_by_n_by_4_fn_t sad_32x32x4d_sse2 = vp9_sad32x32x4d_sse2; +const sad_n_by_n_by_4_fn_t sad_32x16x4d_sse2 = vp9_sad32x16x4d_sse2; +const sad_n_by_n_by_4_fn_t sad_16x32x4d_sse2 = vp9_sad16x32x4d_sse2; const sad_n_by_n_by_4_fn_t sad_16x16x4d_sse2 = vp9_sad16x16x4d_sse2; const sad_n_by_n_by_4_fn_t sad_16x8x4d_sse2 = vp9_sad16x8x4d_sse2; const sad_n_by_n_by_4_fn_t sad_8x16x4d_sse2 = vp9_sad8x16x4d_sse2; const sad_n_by_n_by_4_fn_t sad_8x8x4d_sse2 = vp9_sad8x8x4d_sse2; +const sad_n_by_n_by_4_fn_t sad_8x4x4d_sse2 = vp9_sad8x4x4d_sse2; INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values( make_tuple(64, 64, sad_64x64x4d_sse2), + make_tuple(64, 32, sad_64x32x4d_sse2), + make_tuple(32, 64, sad_32x64x4d_sse2), make_tuple(32, 32, sad_32x32x4d_sse2), + make_tuple(32, 16, sad_32x16x4d_sse2), + make_tuple(16, 32, sad_16x32x4d_sse2), make_tuple(16, 16, sad_16x16x4d_sse2), make_tuple(16, 8, sad_16x8x4d_sse2), make_tuple(8, 16, sad_8x16x4d_sse2), - make_tuple(8, 8, sad_8x8x4d_sse2))); + make_tuple(8, 8, sad_8x8x4d_sse2), + make_tuple(8, 4, sad_8x4x4d_sse2))); #endif #endif diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index f281e08e9..bca24b7eb 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -505,13 +505,14 @@ specialize vp9_sad8x8x4d sse2 # TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form prototype void vp9_sad8x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad8x4x4d +specialize vp9_sad8x4x4d sse2 prototype void vp9_sad4x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad4x8x4d +specialize vp9_sad4x8x4d sse prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" specialize vp9_sad4x4x4d sse + prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse" specialize vp9_sub_pixel_mse16x16 sse2 mmx diff --git a/vp9/encoder/x86/vp9_sad4d_sse2.asm b/vp9/encoder/x86/vp9_sad4d_sse2.asm index 25dd064e1..b4936281f 100644 --- a/vp9/encoder/x86/vp9_sad4d_sse2.asm +++ b/vp9/encoder/x86/vp9_sad4d_sse2.asm @@ -224,6 +224,8 @@ SADNXN4D 16, 16 SADNXN4D 16, 8 SADNXN4D 8, 16 SADNXN4D 8, 8 +SADNXN4D 8, 4 INIT_MMX sse +SADNXN4D 4, 8 SADNXN4D 4, 4