Merge "Add 64x variance Neon functions"
This commit is contained in:
commit
bd3dbc588c
@ -1914,11 +1914,17 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
const vp9_variance_fn_t variance8x8_neon = vp9_variance8x8_neon;
|
const vp9_variance_fn_t variance8x8_neon = vp9_variance8x8_neon;
|
||||||
const vp9_variance_fn_t variance16x16_neon = vp9_variance16x16_neon;
|
const vp9_variance_fn_t variance16x16_neon = vp9_variance16x16_neon;
|
||||||
const vp9_variance_fn_t variance32x32_neon = vp9_variance32x32_neon;
|
const vp9_variance_fn_t variance32x32_neon = vp9_variance32x32_neon;
|
||||||
|
const vp9_variance_fn_t variance32x64_neon = vp9_variance32x64_neon;
|
||||||
|
const vp9_variance_fn_t variance64x32_neon = vp9_variance64x32_neon;
|
||||||
|
const vp9_variance_fn_t variance64x64_neon = vp9_variance64x64_neon;
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
NEON, VP9VarianceTest,
|
NEON, VP9VarianceTest,
|
||||||
::testing::Values(make_tuple(3, 3, variance8x8_neon, 0),
|
::testing::Values(make_tuple(3, 3, variance8x8_neon, 0),
|
||||||
make_tuple(4, 4, variance16x16_neon, 0),
|
make_tuple(4, 4, variance16x16_neon, 0),
|
||||||
make_tuple(5, 5, variance32x32_neon, 0)));
|
make_tuple(5, 5, variance32x32_neon, 0),
|
||||||
|
make_tuple(5, 6, variance32x64_neon, 0),
|
||||||
|
make_tuple(6, 5, variance64x32_neon, 0),
|
||||||
|
make_tuple(6, 6, variance64x64_neon, 0)));
|
||||||
|
|
||||||
const vp9_subpixvariance_fn_t subpel_variance8x8_neon =
|
const vp9_subpixvariance_fn_t subpel_variance8x8_neon =
|
||||||
vp9_sub_pixel_variance8x8_neon;
|
vp9_sub_pixel_variance8x8_neon;
|
||||||
|
@ -798,16 +798,16 @@ add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int sourc
|
|||||||
specialize qw/vp9_variance16x32/, "$sse2_x86inc";
|
specialize qw/vp9_variance16x32/, "$sse2_x86inc";
|
||||||
|
|
||||||
add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||||
specialize qw/vp9_variance64x32 avx2/, "$sse2_x86inc";
|
specialize qw/vp9_variance64x32 avx2 neon/, "$sse2_x86inc";
|
||||||
|
|
||||||
add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||||
specialize qw/vp9_variance32x64/, "$sse2_x86inc";
|
specialize qw/vp9_variance32x64 neon/, "$sse2_x86inc";
|
||||||
|
|
||||||
add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||||
specialize qw/vp9_variance32x32 avx2 neon/, "$sse2_x86inc";
|
specialize qw/vp9_variance32x32 avx2 neon/, "$sse2_x86inc";
|
||||||
|
|
||||||
add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||||
specialize qw/vp9_variance64x64 avx2/, "$sse2_x86inc";
|
specialize qw/vp9_variance64x64 avx2 neon/, "$sse2_x86inc";
|
||||||
|
|
||||||
add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
|
||||||
specialize qw/vp9_variance16x16 avx2 neon/, "$sse2_x86inc";
|
specialize qw/vp9_variance16x16 avx2 neon/, "$sse2_x86inc";
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
#include "./vp9_rtcd.h"
|
#include "./vp9_rtcd.h"
|
||||||
|
#include "./vpx_config.h"
|
||||||
|
|
||||||
#include "vpx_ports/mem.h"
|
#include "vpx_ports/mem.h"
|
||||||
#include "vpx/vpx_integer.h"
|
#include "vpx/vpx_integer.h"
|
||||||
@ -28,6 +29,8 @@ enum { kHeight16PlusOne = 17 };
|
|||||||
enum { kWidth32 = 32 };
|
enum { kWidth32 = 32 };
|
||||||
enum { kHeight32 = 32 };
|
enum { kHeight32 = 32 };
|
||||||
enum { kHeight32PlusOne = 33 };
|
enum { kHeight32PlusOne = 33 };
|
||||||
|
enum { kWidth64 = 64 };
|
||||||
|
enum { kHeight64 = 64 };
|
||||||
enum { kPixelStepOne = 1 };
|
enum { kPixelStepOne = 1 };
|
||||||
enum { kAlign16 = 16 };
|
enum { kAlign16 = 16 };
|
||||||
|
|
||||||
@ -208,6 +211,30 @@ unsigned int vp9_variance32x32_neon(const uint8_t *a, int a_stride,
|
|||||||
return *sse - (((int64_t)sum * sum) / (kWidth32 * kHeight32));
|
return *sse - (((int64_t)sum * sum) / (kWidth32 * kHeight32));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int vp9_variance32x64_neon(const uint8_t *a, int a_stride,
|
||||||
|
const uint8_t *b, int b_stride,
|
||||||
|
unsigned int *sse) {
|
||||||
|
int sum;
|
||||||
|
variance_neon_w8(a, a_stride, b, b_stride, kWidth32, kHeight64, sse, &sum);
|
||||||
|
return *sse - (((int64_t)sum * sum) >> 11); // >> 11 = / 32 * 64
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int vp9_variance64x32_neon(const uint8_t *a, int a_stride,
|
||||||
|
const uint8_t *b, int b_stride,
|
||||||
|
unsigned int *sse) {
|
||||||
|
int sum;
|
||||||
|
variance_neon_w8(a, a_stride, b, b_stride, kWidth64, kHeight32, sse, &sum);
|
||||||
|
return *sse - (((int64_t)sum * sum) >> 11); // >> 11 = / 64 * 32
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int vp9_variance64x64_neon(const uint8_t *a, int a_stride,
|
||||||
|
const uint8_t *b, int b_stride,
|
||||||
|
unsigned int *sse) {
|
||||||
|
int sum;
|
||||||
|
variance_neon_w8(a, a_stride, b, b_stride, kWidth64, kHeight64, sse, &sum);
|
||||||
|
return *sse - (((int64_t)sum * sum) >> 12); // >> 12 = / 64 * 64
|
||||||
|
}
|
||||||
|
|
||||||
unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
|
unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
|
||||||
int src_stride,
|
int src_stride,
|
||||||
int xoffset,
|
int xoffset,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user