Use shifted value for sinpi8sqrt2
The value 35468 changes sign when stored in int16_t: implicit conversion from 'int' to 'int16_t' (aka 'short') changes value from 35468 to -30068 This negation requires adding back the original value to compensate. Shifting the value keeps the value positive and saves a post-vqdmulh shift. This technique is used in webp and idct_dequant_full_2x_neon BUG=b/28027557 Change-Id: I0c5ce09bea170fe08061856c2af6f841a557e0c3
This commit is contained in:
parent
ada850786c
commit
ab0e7a237a
@ -115,6 +115,10 @@ TEST_P(IDCTTest, TestWithData) {
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_c));
|
||||
#if HAVE_NEON
|
||||
INSTANTIATE_TEST_CASE_P(NEON, IDCTTest,
|
||||
::testing::Values(vp8_short_idct4x4llm_neon));
|
||||
#endif
|
||||
#if HAVE_MMX
|
||||
INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
|
||||
::testing::Values(vp8_short_idct4x4llm_mmx));
|
||||
|
@ -11,7 +11,11 @@
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 35468;
|
||||
// 35468 exceeds INT16_MAX and gets converted to a negative number. Because of
|
||||
// the way it is used in vqdmulh, where the result is doubled, it can be divided
|
||||
// by 2 beforehand. This saves compensating for the negative value as well as
|
||||
// shifting the result.
|
||||
static const int16_t sinpi8sqrt2 = 35468 >> 1;
|
||||
|
||||
void vp8_dequant_idct_add_neon(int16_t *input, int16_t *dq, unsigned char *dst,
|
||||
int stride) {
|
||||
@ -60,10 +64,8 @@ void vp8_dequant_idct_add_neon(int16_t *input, int16_t *dq, unsigned char *dst,
|
||||
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
|
||||
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
|
||||
|
||||
q3 = vshrq_n_s16(q3, 1);
|
||||
q4 = vshrq_n_s16(q4, 1);
|
||||
|
||||
q3 = vqaddq_s16(q3, q2);
|
||||
q4 = vqaddq_s16(q4, q2);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
|
||||
@ -90,10 +92,8 @@ void vp8_dequant_idct_add_neon(int16_t *input, int16_t *dq, unsigned char *dst,
|
||||
d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
|
||||
d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
|
||||
|
||||
q3 = vshrq_n_s16(q3, 1);
|
||||
q4 = vshrq_n_s16(q4, 1);
|
||||
|
||||
q3 = vqaddq_s16(q3, q2);
|
||||
q4 = vqaddq_s16(q4, q2);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
|
||||
|
@ -11,7 +11,11 @@
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 35468;
|
||||
// 35468 exceeds INT16_MAX and gets converted to a negative number. Because of
|
||||
// the way it is used in vqdmulh, where the result is doubled, it can be divided
|
||||
// by 2 beforehand. This saves compensating for the negative value as well as
|
||||
// shifting the result.
|
||||
static const int16_t sinpi8sqrt2 = 35468 >> 1;
|
||||
|
||||
void vp8_short_idct4x4llm_neon(int16_t *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
@ -40,10 +44,8 @@ void vp8_short_idct4x4llm_neon(int16_t *input, unsigned char *pred_ptr,
|
||||
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
|
||||
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
|
||||
|
||||
q3s16 = vshrq_n_s16(q3s16, 1);
|
||||
q4s16 = vshrq_n_s16(q4s16, 1);
|
||||
|
||||
q3s16 = vqaddq_s16(q3s16, q2s16);
|
||||
q4s16 = vqaddq_s16(q4s16, q2s16);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
|
||||
@ -71,10 +73,8 @@ void vp8_short_idct4x4llm_neon(int16_t *input, unsigned char *pred_ptr,
|
||||
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
|
||||
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
|
||||
|
||||
q3s16 = vshrq_n_s16(q3s16, 1);
|
||||
q4s16 = vshrq_n_s16(q4s16, 1);
|
||||
|
||||
q3s16 = vqaddq_s16(q3s16, q2s16);
|
||||
q4s16 = vqaddq_s16(q4s16, q2s16);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
|
||||
|
Loading…
Reference in New Issue
Block a user