Merge "Add vp9_avg_4x4_neon and the unit test."
This commit is contained in:
commit
d9bba21306
@ -372,7 +372,10 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(16, 16, 0, 8, &vp9_avg_8x8_neon),
|
make_tuple(16, 16, 0, 8, &vp9_avg_8x8_neon),
|
||||||
make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon),
|
make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon),
|
||||||
make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon)));
|
make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon),
|
||||||
|
make_tuple(16, 16, 0, 4, &vp9_avg_4x4_neon),
|
||||||
|
make_tuple(16, 16, 5, 4, &vp9_avg_4x4_neon),
|
||||||
|
make_tuple(32, 32, 15, 4, &vp9_avg_4x4_neon)));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
NEON, IntProRowTest, ::testing::Values(
|
NEON, IntProRowTest, ::testing::Values(
|
||||||
|
@ -198,7 +198,7 @@ add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
|
|||||||
specialize qw/vp9_avg_8x8 sse2 neon msa/;
|
specialize qw/vp9_avg_8x8 sse2 neon msa/;
|
||||||
|
|
||||||
add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
|
add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
|
||||||
specialize qw/vp9_avg_4x4 sse2 msa/;
|
specialize qw/vp9_avg_4x4 sse2 neon msa/;
|
||||||
|
|
||||||
add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
|
add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
|
||||||
specialize qw/vp9_minmax_8x8 sse2/;
|
specialize qw/vp9_minmax_8x8 sse2/;
|
||||||
|
@ -24,6 +24,18 @@ static INLINE unsigned int horizontal_add_u16x8(const uint16x8_t v_16x8) {
|
|||||||
return vget_lane_u32(c, 0);
|
return vget_lane_u32(c, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int vp9_avg_4x4_neon(const uint8_t *s, int p) {
|
||||||
|
uint16x8_t v_sum;
|
||||||
|
uint32x2_t v_s0 = vdup_n_u32(0);
|
||||||
|
uint32x2_t v_s1 = vdup_n_u32(0);
|
||||||
|
v_s0 = vld1_lane_u32((const uint32_t *)s, v_s0, 0);
|
||||||
|
v_s0 = vld1_lane_u32((const uint32_t *)(s + p), v_s0, 1);
|
||||||
|
v_s1 = vld1_lane_u32((const uint32_t *)(s + 2 * p), v_s1, 0);
|
||||||
|
v_s1 = vld1_lane_u32((const uint32_t *)(s + 3 * p), v_s1, 1);
|
||||||
|
v_sum = vaddl_u8(vreinterpret_u8_u32(v_s0), vreinterpret_u8_u32(v_s1));
|
||||||
|
return (horizontal_add_u16x8(v_sum) + 8) >> 4;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) {
|
unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) {
|
||||||
uint8x8_t v_s0 = vld1_u8(s);
|
uint8x8_t v_s0 = vld1_u8(s);
|
||||||
const uint8x8_t v_s1 = vld1_u8(s + p);
|
const uint8x8_t v_s1 = vld1_u8(s + p);
|
||||||
|
Loading…
Reference in New Issue
Block a user