vpxdsp: [x86] add highbd_dc_128_predictor functions
C vs SSE2 speed gains: _4x4 : ~7.64x _8x8 : ~16.60x _16x16 : ~8.15x _32x32 : ~5.05x BUG=webm:1411 Change-Id: If165d419711cfda901bd428a05ca1560a009e62e
This commit is contained in:
parent
d49a1a5329
commit
bc4bcca3fd
@ -483,7 +483,8 @@ HIGHBD_INTRA_PRED_TEST(
|
||||
HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred4,
|
||||
vpx_highbd_dc_predictor_4x4_sse2,
|
||||
vpx_highbd_dc_left_predictor_4x4_sse2,
|
||||
vpx_highbd_dc_top_predictor_4x4_sse2, NULL,
|
||||
vpx_highbd_dc_top_predictor_4x4_sse2,
|
||||
vpx_highbd_dc_128_predictor_4x4_sse2,
|
||||
vpx_highbd_v_predictor_4x4_sse2,
|
||||
vpx_highbd_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, vpx_highbd_tm_predictor_4x4_c)
|
||||
@ -491,7 +492,8 @@ HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred4,
|
||||
HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred8,
|
||||
vpx_highbd_dc_predictor_8x8_sse2,
|
||||
vpx_highbd_dc_left_predictor_8x8_sse2,
|
||||
vpx_highbd_dc_top_predictor_8x8_sse2, NULL,
|
||||
vpx_highbd_dc_top_predictor_8x8_sse2,
|
||||
vpx_highbd_dc_128_predictor_8x8_sse2,
|
||||
vpx_highbd_v_predictor_8x8_sse2,
|
||||
vpx_highbd_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, vpx_highbd_tm_predictor_8x8_sse2)
|
||||
@ -499,7 +501,8 @@ HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred8,
|
||||
HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred16,
|
||||
vpx_highbd_dc_predictor_16x16_sse2,
|
||||
vpx_highbd_dc_left_predictor_16x16_sse2,
|
||||
vpx_highbd_dc_top_predictor_16x16_sse2, NULL,
|
||||
vpx_highbd_dc_top_predictor_16x16_sse2,
|
||||
vpx_highbd_dc_128_predictor_16x16_sse2,
|
||||
vpx_highbd_v_predictor_16x16_sse2,
|
||||
vpx_highbd_h_predictor_16x16_sse2, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, vpx_highbd_tm_predictor_16x16_sse2)
|
||||
@ -507,7 +510,8 @@ HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred16,
|
||||
HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred32,
|
||||
vpx_highbd_dc_predictor_32x32_sse2,
|
||||
vpx_highbd_dc_left_predictor_32x32_sse2,
|
||||
vpx_highbd_dc_top_predictor_32x32_sse2, NULL,
|
||||
vpx_highbd_dc_top_predictor_32x32_sse2,
|
||||
vpx_highbd_dc_128_predictor_32x32_sse2,
|
||||
vpx_highbd_v_predictor_32x32_sse2,
|
||||
vpx_highbd_h_predictor_32x32_sse2, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, vpx_highbd_tm_predictor_32x32_sse2)
|
||||
|
@ -471,6 +471,14 @@ TEST_P(VP9HighbdIntraPredTest, HighbdIntraPredTests) {
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2_TO_C_8, VP9HighbdIntraPredTest,
|
||||
::testing::Values(
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_128_predictor_4x4_c, 4, 8),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2,
|
||||
&vpx_highbd_dc_128_predictor_8x8_c, 8, 8),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2,
|
||||
&vpx_highbd_dc_128_predictor_16x16_c, 16, 8),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2,
|
||||
&vpx_highbd_dc_128_predictor_32x32_c, 32, 8),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_left_predictor_4x4_c, 4, 8),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
|
||||
@ -523,6 +531,14 @@ INSTANTIATE_TEST_CASE_P(
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2_TO_C_10, VP9HighbdIntraPredTest,
|
||||
::testing::Values(
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_128_predictor_4x4_c, 4, 10),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2,
|
||||
&vpx_highbd_dc_128_predictor_8x8_c, 8, 10),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2,
|
||||
&vpx_highbd_dc_128_predictor_16x16_c, 16, 10),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2,
|
||||
&vpx_highbd_dc_128_predictor_32x32_c, 32, 10),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_left_predictor_4x4_c, 4, 10),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
|
||||
@ -575,6 +591,14 @@ INSTANTIATE_TEST_CASE_P(
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2_TO_C_12, VP9HighbdIntraPredTest,
|
||||
::testing::Values(
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_128_predictor_4x4_c, 4, 12),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2,
|
||||
&vpx_highbd_dc_128_predictor_8x8_c, 8, 12),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2,
|
||||
&vpx_highbd_dc_128_predictor_16x16_c, 16, 12),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2,
|
||||
&vpx_highbd_dc_128_predictor_32x32_c, 32, 12),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_left_predictor_4x4_c, 4, 12),
|
||||
HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
|
||||
|
@ -220,7 +220,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
specialize qw/vpx_highbd_dc_left_predictor_4x4 neon sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vpx_highbd_dc_128_predictor_4x4 neon/;
|
||||
specialize qw/vpx_highbd_dc_128_predictor_4x4 neon sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
|
||||
@ -255,7 +255,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
specialize qw/vpx_highbd_dc_left_predictor_8x8 neon sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vpx_highbd_dc_128_predictor_8x8 neon/;
|
||||
specialize qw/vpx_highbd_dc_128_predictor_8x8 neon sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
|
||||
@ -290,7 +290,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
specialize qw/vpx_highbd_dc_left_predictor_16x16 neon sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vpx_highbd_dc_128_predictor_16x16 neon/;
|
||||
specialize qw/vpx_highbd_dc_128_predictor_16x16 neon sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
|
||||
@ -325,7 +325,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
specialize qw/vpx_highbd_dc_left_predictor_32x32 neon sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vpx_highbd_dc_128_predictor_32x32 neon/;
|
||||
specialize qw/vpx_highbd_dc_128_predictor_32x32 neon sse2/;
|
||||
} # CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#
|
||||
|
@ -199,6 +199,16 @@ void vpx_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
|
||||
dc_store_4x4(dst, stride, &dc);
|
||||
}
|
||||
|
||||
void vpx_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
|
||||
const uint16_t *above,
|
||||
const uint16_t *left, int bd) {
|
||||
const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
|
||||
const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
|
||||
(void)above;
|
||||
(void)left;
|
||||
dc_store_4x4(dst, stride, &dc_dup);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// DC 8x8
|
||||
|
||||
@ -243,6 +253,16 @@ void vpx_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
|
||||
dc_store_8x8(dst, stride, &dc);
|
||||
}
|
||||
|
||||
void vpx_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
|
||||
const uint16_t *above,
|
||||
const uint16_t *left, int bd) {
|
||||
const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
|
||||
const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
|
||||
(void)above;
|
||||
(void)left;
|
||||
dc_store_8x8(dst, stride, &dc_dup);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// DC 16x16
|
||||
|
||||
@ -285,6 +305,16 @@ void vpx_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
|
||||
dc_store_16x16(dst, stride, &dc);
|
||||
}
|
||||
|
||||
void vpx_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
|
||||
const uint16_t *above,
|
||||
const uint16_t *left, int bd) {
|
||||
const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
|
||||
const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
|
||||
(void)above;
|
||||
(void)left;
|
||||
dc_store_16x16(dst, stride, &dc_dup);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// DC 32x32
|
||||
|
||||
@ -331,3 +361,13 @@ void vpx_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
|
||||
(void)bd;
|
||||
dc_store_32x32(dst, stride, &dc);
|
||||
}
|
||||
|
||||
void vpx_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
|
||||
const uint16_t *above,
|
||||
const uint16_t *left, int bd) {
|
||||
const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
|
||||
const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
|
||||
(void)above;
|
||||
(void)left;
|
||||
dc_store_32x32(dst, stride, &dc_dup);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user