Merge "vp9_reconintra_neon: add DC 32x32 predictors"
This commit is contained in:
commit
e77f859d72
@ -362,9 +362,12 @@ INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL,
|
|||||||
#endif // HAVE_SSSE3
|
#endif // HAVE_SSSE3
|
||||||
|
|
||||||
#if HAVE_NEON
|
#if HAVE_NEON
|
||||||
INTRA_PRED_TEST(NEON, TestIntraPred32, NULL, NULL, NULL, NULL,
|
INTRA_PRED_TEST(NEON, TestIntraPred32, vp9_dc_predictor_32x32_neon,
|
||||||
vp9_v_predictor_32x32_neon, vp9_h_predictor_32x32_neon, NULL,
|
vp9_dc_left_predictor_32x32_neon,
|
||||||
NULL, NULL, NULL, NULL, NULL, vp9_tm_predictor_32x32_neon)
|
vp9_dc_top_predictor_32x32_neon,
|
||||||
|
vp9_dc_128_predictor_32x32_neon, vp9_v_predictor_32x32_neon,
|
||||||
|
vp9_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||||
|
vp9_tm_predictor_32x32_neon)
|
||||||
#endif // HAVE_NEON
|
#endif // HAVE_NEON
|
||||||
|
|
||||||
#if HAVE_MSA
|
#if HAVE_MSA
|
||||||
|
@ -161,6 +161,89 @@ void vp9_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
|
|||||||
dc_16x16(dst, stride, NULL, NULL, 0, 0);
|
dc_16x16(dst, stride, NULL, NULL, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// DC 32x32
|
||||||
|
|
||||||
|
// 'do_above' and 'do_left' facilitate branch removal when inlined.
|
||||||
|
static INLINE void dc_32x32(uint8_t *dst, ptrdiff_t stride,
|
||||||
|
const uint8_t *above, const uint8_t *left,
|
||||||
|
int do_above, int do_left) {
|
||||||
|
uint16x8_t sum_top;
|
||||||
|
uint16x8_t sum_left;
|
||||||
|
uint8x8_t dc0;
|
||||||
|
|
||||||
|
if (do_above) {
|
||||||
|
const uint8x16_t A0 = vld1q_u8(above); // top row
|
||||||
|
const uint8x16_t A1 = vld1q_u8(above + 16);
|
||||||
|
const uint16x8_t p0 = vpaddlq_u8(A0); // cascading summation of the top
|
||||||
|
const uint16x8_t p1 = vpaddlq_u8(A1);
|
||||||
|
const uint16x8_t p2 = vaddq_u16(p0, p1);
|
||||||
|
const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
|
||||||
|
const uint16x4_t p4 = vpadd_u16(p3, p3);
|
||||||
|
const uint16x4_t p5 = vpadd_u16(p4, p4);
|
||||||
|
sum_top = vcombine_u16(p5, p5);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (do_left) {
|
||||||
|
const uint8x16_t L0 = vld1q_u8(left); // left row
|
||||||
|
const uint8x16_t L1 = vld1q_u8(left + 16);
|
||||||
|
const uint16x8_t p0 = vpaddlq_u8(L0); // cascading summation of the left
|
||||||
|
const uint16x8_t p1 = vpaddlq_u8(L1);
|
||||||
|
const uint16x8_t p2 = vaddq_u16(p0, p1);
|
||||||
|
const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
|
||||||
|
const uint16x4_t p4 = vpadd_u16(p3, p3);
|
||||||
|
const uint16x4_t p5 = vpadd_u16(p4, p4);
|
||||||
|
sum_left = vcombine_u16(p5, p5);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (do_above && do_left) {
|
||||||
|
const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
|
||||||
|
dc0 = vrshrn_n_u16(sum, 6);
|
||||||
|
} else if (do_above) {
|
||||||
|
dc0 = vrshrn_n_u16(sum_top, 5);
|
||||||
|
} else if (do_left) {
|
||||||
|
dc0 = vrshrn_n_u16(sum_left, 5);
|
||||||
|
} else {
|
||||||
|
dc0 = vdup_n_u8(0x80);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const uint8x16_t dc = vdupq_lane_u8(dc0, 0);
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < 32; ++i) {
|
||||||
|
vst1q_u8(dst + i * stride, dc);
|
||||||
|
vst1q_u8(dst + i * stride + 16, dc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
|
||||||
|
const uint8_t *above, const uint8_t *left) {
|
||||||
|
dc_32x32(dst, stride, above, left, 1, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
|
||||||
|
const uint8_t *above,
|
||||||
|
const uint8_t *left) {
|
||||||
|
(void)above;
|
||||||
|
dc_32x32(dst, stride, NULL, left, 0, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
|
||||||
|
const uint8_t *above,
|
||||||
|
const uint8_t *left) {
|
||||||
|
(void)left;
|
||||||
|
dc_32x32(dst, stride, above, NULL, 1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp9_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
|
||||||
|
const uint8_t *above,
|
||||||
|
const uint8_t *left) {
|
||||||
|
(void)above;
|
||||||
|
(void)left;
|
||||||
|
dc_32x32(dst, stride, NULL, NULL, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
#if !HAVE_NEON_ASM
|
#if !HAVE_NEON_ASM
|
||||||
|
|
||||||
void vp9_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
|
void vp9_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
|
||||||
|
@ -201,16 +201,16 @@ add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, co
|
|||||||
specialize qw/vp9_tm_predictor_32x32 neon msa/, "$sse2_x86_64";
|
specialize qw/vp9_tm_predictor_32x32 neon msa/, "$sse2_x86_64";
|
||||||
|
|
||||||
add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||||
specialize qw/vp9_dc_predictor_32x32 msa/, "$sse2_x86inc";
|
specialize qw/vp9_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
|
||||||
|
|
||||||
add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||||
specialize qw/vp9_dc_top_predictor_32x32 msa/, "$sse2_x86inc";
|
specialize qw/vp9_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc";
|
||||||
|
|
||||||
add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||||
specialize qw/vp9_dc_left_predictor_32x32 msa/, "$sse2_x86inc";
|
specialize qw/vp9_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";
|
||||||
|
|
||||||
add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||||
specialize qw/vp9_dc_128_predictor_32x32 msa/, "$sse2_x86inc";
|
specialize qw/vp9_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
|
||||||
|
|
||||||
#
|
#
|
||||||
# Loopfilter
|
# Loopfilter
|
||||||
|
Loading…
Reference in New Issue
Block a user