Merge "Add vp9_avg_4x4_neon and the unit test."

This commit is contained in:
Jacky Chen 2015-12-09 06:09:33 +00:00 committed by Gerrit Code Review
commit d9bba21306
3 changed files with 17 additions and 2 deletions

View File

@ -372,7 +372,10 @@ INSTANTIATE_TEST_CASE_P(
::testing::Values( ::testing::Values(
make_tuple(16, 16, 0, 8, &vp9_avg_8x8_neon), make_tuple(16, 16, 0, 8, &vp9_avg_8x8_neon),
make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon), make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon),
make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon))); make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon),
make_tuple(16, 16, 0, 4, &vp9_avg_4x4_neon),
make_tuple(16, 16, 5, 4, &vp9_avg_4x4_neon),
make_tuple(32, 32, 15, 4, &vp9_avg_4x4_neon)));
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
NEON, IntProRowTest, ::testing::Values( NEON, IntProRowTest, ::testing::Values(

View File

@ -198,7 +198,7 @@ add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
specialize qw/vp9_avg_8x8 sse2 neon msa/; specialize qw/vp9_avg_8x8 sse2 neon msa/;
add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p"; add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
specialize qw/vp9_avg_4x4 sse2 msa/; specialize qw/vp9_avg_4x4 sse2 neon msa/;
add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
specialize qw/vp9_minmax_8x8 sse2/; specialize qw/vp9_minmax_8x8 sse2/;

View File

@ -24,6 +24,18 @@ static INLINE unsigned int horizontal_add_u16x8(const uint16x8_t v_16x8) {
return vget_lane_u32(c, 0); return vget_lane_u32(c, 0);
} }
unsigned int vp9_avg_4x4_neon(const uint8_t *s, int p) {
uint16x8_t v_sum;
uint32x2_t v_s0 = vdup_n_u32(0);
uint32x2_t v_s1 = vdup_n_u32(0);
v_s0 = vld1_lane_u32((const uint32_t *)s, v_s0, 0);
v_s0 = vld1_lane_u32((const uint32_t *)(s + p), v_s0, 1);
v_s1 = vld1_lane_u32((const uint32_t *)(s + 2 * p), v_s1, 0);
v_s1 = vld1_lane_u32((const uint32_t *)(s + 3 * p), v_s1, 1);
v_sum = vaddl_u8(vreinterpret_u8_u32(v_s0), vreinterpret_u8_u32(v_s1));
return (horizontal_add_u16x8(v_sum) + 8) >> 4;
}
unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) { unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) {
uint8x8_t v_s0 = vld1_u8(s); uint8x8_t v_s0 = vld1_u8(s);
const uint8x8_t v_s1 = vld1_u8(s + p); const uint8x8_t v_s1 = vld1_u8(s + p);