Update vpx_idct4x4_16_add_neon() to pass SingleExtremeCoeff test
Change-Id: Icc4ead05506797d12bf134e8790443676fef5c10
This commit is contained in:
@@ -65,16 +65,11 @@ int16_t MaxSupportedCoeff(InvTxfmFunc a) {
|
||||
|
||||
int16_t MinSupportedCoeff(InvTxfmFunc a) {
|
||||
(void)a;
|
||||
#if !CONFIG_EMULATE_HARDWARE
|
||||
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
|
||||
!CONFIG_EMULATE_HARDWARE
|
||||
if (a == vpx_idct8x8_64_add_ssse3 || a == vpx_idct8x8_12_add_ssse3) {
|
||||
return -23625 + 1;
|
||||
}
|
||||
#elif HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
|
||||
if (a == vpx_idct4x4_16_add_neon) {
|
||||
return std::numeric_limits<int16_t>::min() + 1;
|
||||
}
|
||||
#endif
|
||||
#endif // !CONFIG_EMULATE_HARDWARE
|
||||
return std::numeric_limits<int16_t>::min();
|
||||
}
|
||||
|
||||
@@ -72,16 +72,15 @@
|
||||
; do the transform on transposed rows
|
||||
|
||||
; stage 1
|
||||
vadd.s16 d23, d16, d18 ; (input[0] + input[2])
|
||||
vsub.s16 d24, d16, d18 ; (input[0] - input[2])
|
||||
|
||||
vmull.s16 q15, d17, d22 ; input[1] * cospi_24_64
|
||||
vmull.s16 q1, d17, d20 ; input[1] * cospi_8_64
|
||||
|
||||
; (input[0] + input[2]) * cospi_16_64;
|
||||
; (input[0] - input[2]) * cospi_16_64;
|
||||
vmull.s16 q13, d23, d21
|
||||
vmull.s16 q14, d24, d21
|
||||
vmull.s16 q8, d16, d21
|
||||
vmull.s16 q14, d18, d21
|
||||
vadd.s32 q13, q8, q14
|
||||
vsub.s32 q14, q8, q14
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64;
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64;
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
static INLINE void idct4x4_16_kernel(const int16x4_t cospis, int16x8_t *a0,
|
||||
int16x8_t *a1) {
|
||||
int16x4_t b0, b1, b2, b3, b4, b5;
|
||||
int16x4_t b0, b1, b2, b3;
|
||||
int32x4_t c0, c1, c2, c3;
|
||||
int16x8_t d0, d1;
|
||||
|
||||
@@ -27,10 +27,10 @@ static INLINE void idct4x4_16_kernel(const int16x4_t cospis, int16x8_t *a0,
|
||||
b1 = vget_high_s16(*a0);
|
||||
b2 = vget_low_s16(*a1);
|
||||
b3 = vget_high_s16(*a1);
|
||||
b4 = vadd_s16(b0, b1);
|
||||
b5 = vsub_s16(b0, b1);
|
||||
c0 = vmull_lane_s16(b4, cospis, 2);
|
||||
c1 = vmull_lane_s16(b5, cospis, 2);
|
||||
c0 = vmull_lane_s16(b0, cospis, 2);
|
||||
c2 = vmull_lane_s16(b1, cospis, 2);
|
||||
c1 = vsubq_s32(c0, c2);
|
||||
c0 = vaddq_s32(c0, c2);
|
||||
c2 = vmull_lane_s16(b2, cospis, 3);
|
||||
c3 = vmull_lane_s16(b2, cospis, 1);
|
||||
c2 = vmlsl_lane_s16(c2, b3, cospis, 1);
|
||||
|
||||
Reference in New Issue
Block a user