Merge "Make vp9 subpixel match vp8"
This commit is contained in:
commit
c005792951
@ -21,6 +21,9 @@
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#if CONFIG_VP8_ENCODER
|
||||
# include "./vp8_rtcd.h"
|
||||
#endif // CONFIG_VP8_ENCODER
|
||||
#if CONFIG_VP9_ENCODER
|
||||
# include "./vp9_rtcd.h"
|
||||
# include "vp9/encoder/vp9_variance.h"
|
||||
@ -32,10 +35,13 @@ namespace {
|
||||
typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride,
|
||||
const uint8_t *b, int b_stride,
|
||||
unsigned int *sse);
|
||||
typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride,
|
||||
int xoffset, int yoffset,
|
||||
const uint8_t *b, int b_stride,
|
||||
unsigned int *sse);
|
||||
typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride,
|
||||
const uint8_t *b, int b_stride);
|
||||
|
||||
|
||||
using ::std::tr1::get;
|
||||
using ::std::tr1::make_tuple;
|
||||
using ::std::tr1::tuple;
|
||||
@ -102,6 +108,12 @@ static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref,
|
||||
(l2w + l2h)));
|
||||
}
|
||||
|
||||
/* The subpel reference functions differ from the codec version in one aspect:
|
||||
* they calculate the bilinear factors directly instead of using a lookup table
|
||||
* and therefore upshift xoff and yoff by 1. Only every other calculated value
|
||||
* is used so the codec version shrinks the table to save space and maintain
|
||||
* compatibility with vp8.
|
||||
*/
|
||||
static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
|
||||
int l2w, int l2h, int xoff, int yoff,
|
||||
uint32_t *sse_ptr,
|
||||
@ -111,6 +123,10 @@ static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
|
||||
uint64_t sse = 0;
|
||||
const int w = 1 << l2w;
|
||||
const int h = 1 << l2h;
|
||||
|
||||
xoff <<= 1;
|
||||
yoff <<= 1;
|
||||
|
||||
for (int y = 0; y < h; y++) {
|
||||
for (int x = 0; x < w; x++) {
|
||||
// Bilinear interpolation at a 16th pel step.
|
||||
@ -480,6 +496,10 @@ static uint32_t subpel_avg_variance_ref(const uint8_t *ref,
|
||||
uint64_t sse = 0;
|
||||
const int w = 1 << l2w;
|
||||
const int h = 1 << l2h;
|
||||
|
||||
xoff <<= 1;
|
||||
yoff <<= 1;
|
||||
|
||||
for (int y = 0; y < h; y++) {
|
||||
for (int x = 0; x < w; x++) {
|
||||
// bilinear interpolation at a 16th pel step
|
||||
@ -598,8 +618,8 @@ class SubpelVarianceTest
|
||||
|
||||
template<typename SubpelVarianceFunctionType>
|
||||
void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
|
||||
for (int x = 0; x < 16; ++x) {
|
||||
for (int y = 0; y < 16; ++y) {
|
||||
for (int x = 0; x < 8; ++x) {
|
||||
for (int y = 0; y < 8; ++y) {
|
||||
if (!use_high_bit_depth_) {
|
||||
for (int j = 0; j < block_size_; j++) {
|
||||
src_[j] = rnd_.Rand8();
|
||||
@ -621,8 +641,9 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
|
||||
unsigned int var1;
|
||||
ASM_REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
|
||||
src_, width_, &sse1));
|
||||
const unsigned int var2 = subpel_variance_ref(ref_, src_, log2width_,
|
||||
log2height_, x, y, &sse2,
|
||||
const unsigned int var2 = subpel_variance_ref(ref_, src_,
|
||||
log2width_, log2height_,
|
||||
x, y, &sse2,
|
||||
use_high_bit_depth_,
|
||||
bit_depth_);
|
||||
EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
|
||||
@ -636,8 +657,8 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
|
||||
// Compare against reference.
|
||||
// Src: Set the first half of values to 0, the second half to the maximum.
|
||||
// Ref: Set the first half of values to the maximum, the second half to 0.
|
||||
for (int x = 0; x < 16; ++x) {
|
||||
for (int y = 0; y < 16; ++y) {
|
||||
for (int x = 0; x < 8; ++x) {
|
||||
for (int y = 0; y < 8; ++y) {
|
||||
const int half = block_size_ / 2;
|
||||
if (!use_high_bit_depth_) {
|
||||
memset(src_, 0, half);
|
||||
@ -658,10 +679,10 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
|
||||
ASM_REGISTER_STATE_CHECK(
|
||||
var1 = subpel_variance_(ref_, width_ + 1, x, y, src_, width_, &sse1));
|
||||
const unsigned int var2 =
|
||||
subpel_variance_ref(ref_, src_, log2width_, log2height_, x, y, &sse2,
|
||||
use_high_bit_depth_, bit_depth_);
|
||||
EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
|
||||
EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
|
||||
subpel_variance_ref(ref_, src_, log2width_, log2height_,
|
||||
x, y, &sse2, use_high_bit_depth_, bit_depth_);
|
||||
EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
|
||||
EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -669,8 +690,8 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
|
||||
#if CONFIG_VP9_ENCODER
|
||||
template<>
|
||||
void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
|
||||
for (int x = 0; x < 16; ++x) {
|
||||
for (int y = 0; y < 16; ++y) {
|
||||
for (int x = 0; x < 8; ++x) {
|
||||
for (int y = 0; y < 8; ++y) {
|
||||
if (!use_high_bit_depth_) {
|
||||
for (int j = 0; j < block_size_; j++) {
|
||||
src_[j] = rnd_.Rand8();
|
||||
@ -795,7 +816,6 @@ const VarianceMxNFunc highbd_8_mse16x16_c = vpx_highbd_8_mse16x16_c;
|
||||
const VarianceMxNFunc highbd_8_mse16x8_c = vpx_highbd_8_mse16x8_c;
|
||||
const VarianceMxNFunc highbd_8_mse8x16_c = vpx_highbd_8_mse8x16_c;
|
||||
const VarianceMxNFunc highbd_8_mse8x8_c = vpx_highbd_8_mse8x8_c;
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_c),
|
||||
make_tuple(4, 4, highbd_12_mse16x8_c),
|
||||
@ -811,7 +831,6 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(4, 4, highbd_8_mse8x8_c)));
|
||||
*/
|
||||
|
||||
|
||||
const VarianceMxNFunc highbd_12_variance64x64_c = vpx_highbd_12_variance64x64_c;
|
||||
const VarianceMxNFunc highbd_12_variance64x32_c = vpx_highbd_12_variance64x32_c;
|
||||
const VarianceMxNFunc highbd_12_variance32x64_c = vpx_highbd_12_variance32x64_c;
|
||||
@ -976,7 +995,6 @@ const VarianceMxNFunc highbd_8_mse16x16_sse2 = vpx_highbd_8_mse16x16_sse2;
|
||||
const VarianceMxNFunc highbd_8_mse16x8_sse2 = vpx_highbd_8_mse16x8_sse2;
|
||||
const VarianceMxNFunc highbd_8_mse8x16_sse2 = vpx_highbd_8_mse8x16_sse2;
|
||||
const VarianceMxNFunc highbd_8_mse8x8_sse2 = vpx_highbd_8_mse8x8_sse2;
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_sse2),
|
||||
make_tuple(4, 3, highbd_12_mse16x8_sse2),
|
||||
@ -1088,8 +1106,15 @@ INSTANTIATE_TEST_CASE_P(
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
#endif // HAVE_SSE2
|
||||
|
||||
#if CONFIG_VP8
|
||||
typedef SubpelVarianceTest<SubpixVarMxNFunc> VP8SubpelVarianceTest;
|
||||
|
||||
TEST_P(VP8SubpelVarianceTest, Ref) { RefTest(); }
|
||||
TEST_P(VP8SubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
|
||||
#endif // CONFIG_VP8
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceTest;
|
||||
typedef SubpelVarianceTest<SubpixVarMxNFunc> VP9SubpelVarianceTest;
|
||||
typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t> VP9SubpelAvgVarianceTest;
|
||||
|
||||
TEST_P(VP9SubpelVarianceTest, Ref) { RefTest(); }
|
||||
@ -1097,7 +1122,7 @@ TEST_P(VP9SubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
|
||||
TEST_P(VP9SubpelAvgVarianceTest, Ref) { RefTest(); }
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceHighTest;
|
||||
typedef SubpelVarianceTest<SubpixVarMxNFunc> VP9SubpelVarianceHighTest;
|
||||
typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t>
|
||||
VP9SubpelAvgVarianceHighTest;
|
||||
|
||||
@ -1106,32 +1131,19 @@ TEST_P(VP9SubpelVarianceHighTest, ExtremeRef) { ExtremeRefTest(); }
|
||||
TEST_P(VP9SubpelAvgVarianceHighTest, Ref) { RefTest(); }
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
const vp9_subpixvariance_fn_t subpel_variance4x4_c =
|
||||
vp9_sub_pixel_variance4x4_c;
|
||||
const vp9_subpixvariance_fn_t subpel_variance4x8_c =
|
||||
vp9_sub_pixel_variance4x8_c;
|
||||
const vp9_subpixvariance_fn_t subpel_variance8x4_c =
|
||||
vp9_sub_pixel_variance8x4_c;
|
||||
const vp9_subpixvariance_fn_t subpel_variance8x8_c =
|
||||
vp9_sub_pixel_variance8x8_c;
|
||||
const vp9_subpixvariance_fn_t subpel_variance8x16_c =
|
||||
vp9_sub_pixel_variance8x16_c;
|
||||
const vp9_subpixvariance_fn_t subpel_variance16x8_c =
|
||||
vp9_sub_pixel_variance16x8_c;
|
||||
const vp9_subpixvariance_fn_t subpel_variance16x16_c =
|
||||
vp9_sub_pixel_variance16x16_c;
|
||||
const vp9_subpixvariance_fn_t subpel_variance16x32_c =
|
||||
vp9_sub_pixel_variance16x32_c;
|
||||
const vp9_subpixvariance_fn_t subpel_variance32x16_c =
|
||||
vp9_sub_pixel_variance32x16_c;
|
||||
const vp9_subpixvariance_fn_t subpel_variance32x32_c =
|
||||
vp9_sub_pixel_variance32x32_c;
|
||||
const vp9_subpixvariance_fn_t subpel_variance32x64_c =
|
||||
vp9_sub_pixel_variance32x64_c;
|
||||
const vp9_subpixvariance_fn_t subpel_variance64x32_c =
|
||||
vp9_sub_pixel_variance64x32_c;
|
||||
const vp9_subpixvariance_fn_t subpel_variance64x64_c =
|
||||
vp9_sub_pixel_variance64x64_c;
|
||||
const SubpixVarMxNFunc subpel_variance4x4_c = vp9_sub_pixel_variance4x4_c;
|
||||
const SubpixVarMxNFunc subpel_variance4x8_c = vp9_sub_pixel_variance4x8_c;
|
||||
const SubpixVarMxNFunc subpel_variance8x4_c = vp9_sub_pixel_variance8x4_c;
|
||||
const SubpixVarMxNFunc subpel_variance8x8_c = vp9_sub_pixel_variance8x8_c;
|
||||
const SubpixVarMxNFunc subpel_variance8x16_c = vp9_sub_pixel_variance8x16_c;
|
||||
const SubpixVarMxNFunc subpel_variance16x8_c = vp9_sub_pixel_variance16x8_c;
|
||||
const SubpixVarMxNFunc subpel_variance16x16_c = vp9_sub_pixel_variance16x16_c;
|
||||
const SubpixVarMxNFunc subpel_variance16x32_c = vp9_sub_pixel_variance16x32_c;
|
||||
const SubpixVarMxNFunc subpel_variance32x16_c = vp9_sub_pixel_variance32x16_c;
|
||||
const SubpixVarMxNFunc subpel_variance32x32_c = vp9_sub_pixel_variance32x32_c;
|
||||
const SubpixVarMxNFunc subpel_variance32x64_c = vp9_sub_pixel_variance32x64_c;
|
||||
const SubpixVarMxNFunc subpel_variance64x32_c = vp9_sub_pixel_variance64x32_c;
|
||||
const SubpixVarMxNFunc subpel_variance64x64_c = vp9_sub_pixel_variance64x64_c;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, VP9SubpelVarianceTest,
|
||||
::testing::Values(make_tuple(2, 2, subpel_variance4x4_c, 0),
|
||||
@ -1147,6 +1159,23 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(5, 6, subpel_variance32x64_c, 0),
|
||||
make_tuple(6, 5, subpel_variance64x32_c, 0),
|
||||
make_tuple(6, 6, subpel_variance64x64_c, 0)));
|
||||
|
||||
#if CONFIG_VP8
|
||||
const SubpixVarMxNFunc vp8_subpel_variance16x16_c =
|
||||
vp8_sub_pixel_variance16x16_c;
|
||||
const SubpixVarMxNFunc vp8_subpel_variance16x8_c = vp8_sub_pixel_variance16x8_c;
|
||||
const SubpixVarMxNFunc vp8_subpel_variance8x16_c = vp8_sub_pixel_variance8x16_c;
|
||||
const SubpixVarMxNFunc vp8_subpel_variance8x8_c = vp8_sub_pixel_variance8x8_c;
|
||||
const SubpixVarMxNFunc vp8_subpel_variance4x4_c = vp8_sub_pixel_variance4x4_c;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, VP8SubpelVarianceTest,
|
||||
::testing::Values(make_tuple(2, 2, vp8_subpel_variance4x4_c, 0),
|
||||
make_tuple(3, 3, vp8_subpel_variance8x8_c, 0),
|
||||
make_tuple(3, 4, vp8_subpel_variance8x16_c, 0),
|
||||
make_tuple(4, 3, vp8_subpel_variance16x8_c, 0),
|
||||
make_tuple(4, 4, vp8_subpel_variance16x16_c, 0)));
|
||||
#endif // CONFIG_VP8
|
||||
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_c =
|
||||
vp9_sub_pixel_avg_variance4x4_c;
|
||||
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_c =
|
||||
@ -1189,83 +1218,83 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(6, 5, subpel_avg_variance64x32_c, 0),
|
||||
make_tuple(6, 6, subpel_avg_variance64x64_c, 0)));
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x4_c =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance4x4_c =
|
||||
vp9_highbd_10_sub_pixel_variance4x4_c;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x8_c =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance4x8_c =
|
||||
vp9_highbd_10_sub_pixel_variance4x8_c;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_c =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance8x4_c =
|
||||
vp9_highbd_10_sub_pixel_variance8x4_c;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_c =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance8x8_c =
|
||||
vp9_highbd_10_sub_pixel_variance8x8_c;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_c =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance8x16_c =
|
||||
vp9_highbd_10_sub_pixel_variance8x16_c;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_c =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance16x8_c =
|
||||
vp9_highbd_10_sub_pixel_variance16x8_c;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_c =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance16x16_c =
|
||||
vp9_highbd_10_sub_pixel_variance16x16_c;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_c =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance16x32_c =
|
||||
vp9_highbd_10_sub_pixel_variance16x32_c;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_c =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance32x16_c =
|
||||
vp9_highbd_10_sub_pixel_variance32x16_c;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_c =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance32x32_c =
|
||||
vp9_highbd_10_sub_pixel_variance32x32_c;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_c =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance32x64_c =
|
||||
vp9_highbd_10_sub_pixel_variance32x64_c;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_c =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance64x32_c =
|
||||
vp9_highbd_10_sub_pixel_variance64x32_c;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_c =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance64x64_c =
|
||||
vp9_highbd_10_sub_pixel_variance64x64_c;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x4_c =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance4x4_c =
|
||||
vp9_highbd_12_sub_pixel_variance4x4_c;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x8_c =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance4x8_c =
|
||||
vp9_highbd_12_sub_pixel_variance4x8_c;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_c =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance8x4_c =
|
||||
vp9_highbd_12_sub_pixel_variance8x4_c;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_c =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance8x8_c =
|
||||
vp9_highbd_12_sub_pixel_variance8x8_c;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_c =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance8x16_c =
|
||||
vp9_highbd_12_sub_pixel_variance8x16_c;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_c =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance16x8_c =
|
||||
vp9_highbd_12_sub_pixel_variance16x8_c;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_c =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance16x16_c =
|
||||
vp9_highbd_12_sub_pixel_variance16x16_c;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_c =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance16x32_c =
|
||||
vp9_highbd_12_sub_pixel_variance16x32_c;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_c =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance32x16_c =
|
||||
vp9_highbd_12_sub_pixel_variance32x16_c;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_c =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance32x32_c =
|
||||
vp9_highbd_12_sub_pixel_variance32x32_c;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_c =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance32x64_c =
|
||||
vp9_highbd_12_sub_pixel_variance32x64_c;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_c =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance64x32_c =
|
||||
vp9_highbd_12_sub_pixel_variance64x32_c;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_c =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance64x64_c =
|
||||
vp9_highbd_12_sub_pixel_variance64x64_c;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance4x4_c =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance4x4_c =
|
||||
vp9_highbd_sub_pixel_variance4x4_c;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance4x8_c =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance4x8_c =
|
||||
vp9_highbd_sub_pixel_variance4x8_c;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_c =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance8x4_c =
|
||||
vp9_highbd_sub_pixel_variance8x4_c;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_c =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance8x8_c =
|
||||
vp9_highbd_sub_pixel_variance8x8_c;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_c =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance8x16_c =
|
||||
vp9_highbd_sub_pixel_variance8x16_c;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_c =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance16x8_c =
|
||||
vp9_highbd_sub_pixel_variance16x8_c;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_c =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance16x16_c =
|
||||
vp9_highbd_sub_pixel_variance16x16_c;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_c =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance16x32_c =
|
||||
vp9_highbd_sub_pixel_variance16x32_c;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_c =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance32x16_c =
|
||||
vp9_highbd_sub_pixel_variance32x16_c;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_c =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance32x32_c =
|
||||
vp9_highbd_sub_pixel_variance32x32_c;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_c =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance32x64_c =
|
||||
vp9_highbd_sub_pixel_variance32x64_c;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_c =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance64x32_c =
|
||||
vp9_highbd_sub_pixel_variance64x32_c;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_c =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance64x64_c =
|
||||
vp9_highbd_sub_pixel_variance64x64_c;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, VP9SubpelVarianceHighTest,
|
||||
@ -1431,34 +1460,48 @@ INSTANTIATE_TEST_CASE_P(
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
|
||||
#if CONFIG_VP8
|
||||
#if HAVE_MMX
|
||||
const SubpixVarMxNFunc subpel_variance16x16_mmx =
|
||||
vp8_sub_pixel_variance16x16_mmx;
|
||||
const SubpixVarMxNFunc subpel_variance16x8_mmx = vp8_sub_pixel_variance16x8_mmx;
|
||||
const SubpixVarMxNFunc subpel_variance8x16_mmx = vp8_sub_pixel_variance8x16_mmx;
|
||||
const SubpixVarMxNFunc subpel_variance8x8_mmx = vp8_sub_pixel_variance8x8_mmx;
|
||||
const SubpixVarMxNFunc subpel_variance4x4_mmx = vp8_sub_pixel_variance4x4_mmx;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MMX, VP8SubpelVarianceTest,
|
||||
::testing::Values(make_tuple(4, 4, subpel_variance16x16_mmx, 0),
|
||||
make_tuple(4, 3, subpel_variance16x8_mmx, 0),
|
||||
make_tuple(3, 4, subpel_variance8x16_mmx, 0),
|
||||
make_tuple(3, 3, subpel_variance8x8_mmx, 0),
|
||||
make_tuple(2, 2, subpel_variance4x4_mmx, 0)));
|
||||
#endif // HAVE_MMX
|
||||
#endif // CONFIG_VP8
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
#if HAVE_SSE2
|
||||
#if CONFIG_USE_X86INC
|
||||
const vp9_subpixvariance_fn_t subpel_variance4x4_sse =
|
||||
vp9_sub_pixel_variance4x4_sse;
|
||||
const vp9_subpixvariance_fn_t subpel_variance4x8_sse =
|
||||
vp9_sub_pixel_variance4x8_sse;
|
||||
const vp9_subpixvariance_fn_t subpel_variance8x4_sse2 =
|
||||
vp9_sub_pixel_variance8x4_sse2;
|
||||
const vp9_subpixvariance_fn_t subpel_variance8x8_sse2 =
|
||||
vp9_sub_pixel_variance8x8_sse2;
|
||||
const vp9_subpixvariance_fn_t subpel_variance8x16_sse2 =
|
||||
const SubpixVarMxNFunc subpel_variance4x4_sse = vp9_sub_pixel_variance4x4_sse;
|
||||
const SubpixVarMxNFunc subpel_variance4x8_sse = vp9_sub_pixel_variance4x8_sse;
|
||||
const SubpixVarMxNFunc subpel_variance8x4_sse2 = vp9_sub_pixel_variance8x4_sse2;
|
||||
const SubpixVarMxNFunc subpel_variance8x8_sse2 = vp9_sub_pixel_variance8x8_sse2;
|
||||
const SubpixVarMxNFunc subpel_variance8x16_sse2 =
|
||||
vp9_sub_pixel_variance8x16_sse2;
|
||||
const vp9_subpixvariance_fn_t subpel_variance16x8_sse2 =
|
||||
const SubpixVarMxNFunc subpel_variance16x8_sse2 =
|
||||
vp9_sub_pixel_variance16x8_sse2;
|
||||
const vp9_subpixvariance_fn_t subpel_variance16x16_sse2 =
|
||||
const SubpixVarMxNFunc subpel_variance16x16_sse2 =
|
||||
vp9_sub_pixel_variance16x16_sse2;
|
||||
const vp9_subpixvariance_fn_t subpel_variance16x32_sse2 =
|
||||
const SubpixVarMxNFunc subpel_variance16x32_sse2 =
|
||||
vp9_sub_pixel_variance16x32_sse2;
|
||||
const vp9_subpixvariance_fn_t subpel_variance32x16_sse2 =
|
||||
const SubpixVarMxNFunc subpel_variance32x16_sse2 =
|
||||
vp9_sub_pixel_variance32x16_sse2;
|
||||
const vp9_subpixvariance_fn_t subpel_variance32x32_sse2 =
|
||||
const SubpixVarMxNFunc subpel_variance32x32_sse2 =
|
||||
vp9_sub_pixel_variance32x32_sse2;
|
||||
const vp9_subpixvariance_fn_t subpel_variance32x64_sse2 =
|
||||
const SubpixVarMxNFunc subpel_variance32x64_sse2 =
|
||||
vp9_sub_pixel_variance32x64_sse2;
|
||||
const vp9_subpixvariance_fn_t subpel_variance64x32_sse2 =
|
||||
const SubpixVarMxNFunc subpel_variance64x32_sse2 =
|
||||
vp9_sub_pixel_variance64x32_sse2;
|
||||
const vp9_subpixvariance_fn_t subpel_variance64x64_sse2 =
|
||||
const SubpixVarMxNFunc subpel_variance64x64_sse2 =
|
||||
vp9_sub_pixel_variance64x64_sse2;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, VP9SubpelVarianceTest,
|
||||
@ -1517,71 +1560,71 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(6, 5, subpel_avg_variance64x32_sse2, 0),
|
||||
make_tuple(6, 6, subpel_avg_variance64x64_sse2, 0)));
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_sse2 =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance8x4_sse2 =
|
||||
vp9_highbd_sub_pixel_variance8x4_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_sse2 =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance8x8_sse2 =
|
||||
vp9_highbd_sub_pixel_variance8x8_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_sse2 =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance8x16_sse2 =
|
||||
vp9_highbd_sub_pixel_variance8x16_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_sse2 =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance16x8_sse2 =
|
||||
vp9_highbd_sub_pixel_variance16x8_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_sse2 =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance16x16_sse2 =
|
||||
vp9_highbd_sub_pixel_variance16x16_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_sse2 =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance16x32_sse2 =
|
||||
vp9_highbd_sub_pixel_variance16x32_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_sse2 =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance32x16_sse2 =
|
||||
vp9_highbd_sub_pixel_variance32x16_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_sse2 =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance32x32_sse2 =
|
||||
vp9_highbd_sub_pixel_variance32x32_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_sse2 =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance32x64_sse2 =
|
||||
vp9_highbd_sub_pixel_variance32x64_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_sse2 =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance64x32_sse2 =
|
||||
vp9_highbd_sub_pixel_variance64x32_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_sse2 =
|
||||
const SubpixVarMxNFunc highbd_subpel_variance64x64_sse2 =
|
||||
vp9_highbd_sub_pixel_variance64x64_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_sse2 =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance8x4_sse2 =
|
||||
vp9_highbd_10_sub_pixel_variance8x4_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_sse2 =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance8x8_sse2 =
|
||||
vp9_highbd_10_sub_pixel_variance8x8_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_sse2 =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance8x16_sse2 =
|
||||
vp9_highbd_10_sub_pixel_variance8x16_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_sse2 =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance16x8_sse2 =
|
||||
vp9_highbd_10_sub_pixel_variance16x8_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_sse2 =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance16x16_sse2 =
|
||||
vp9_highbd_10_sub_pixel_variance16x16_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_sse2 =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance16x32_sse2 =
|
||||
vp9_highbd_10_sub_pixel_variance16x32_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_sse2 =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance32x16_sse2 =
|
||||
vp9_highbd_10_sub_pixel_variance32x16_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_sse2 =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance32x32_sse2 =
|
||||
vp9_highbd_10_sub_pixel_variance32x32_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_sse2 =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance32x64_sse2 =
|
||||
vp9_highbd_10_sub_pixel_variance32x64_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_sse2 =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance64x32_sse2 =
|
||||
vp9_highbd_10_sub_pixel_variance64x32_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_sse2 =
|
||||
const SubpixVarMxNFunc highbd_10_subpel_variance64x64_sse2 =
|
||||
vp9_highbd_10_sub_pixel_variance64x64_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_sse2 =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance8x4_sse2 =
|
||||
vp9_highbd_12_sub_pixel_variance8x4_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_sse2 =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance8x8_sse2 =
|
||||
vp9_highbd_12_sub_pixel_variance8x8_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_sse2 =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance8x16_sse2 =
|
||||
vp9_highbd_12_sub_pixel_variance8x16_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_sse2 =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance16x8_sse2 =
|
||||
vp9_highbd_12_sub_pixel_variance16x8_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_sse2 =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance16x16_sse2 =
|
||||
vp9_highbd_12_sub_pixel_variance16x16_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_sse2 =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance16x32_sse2 =
|
||||
vp9_highbd_12_sub_pixel_variance16x32_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_sse2 =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance32x16_sse2 =
|
||||
vp9_highbd_12_sub_pixel_variance32x16_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_sse2 =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance32x32_sse2 =
|
||||
vp9_highbd_12_sub_pixel_variance32x32_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_sse2 =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance32x64_sse2 =
|
||||
vp9_highbd_12_sub_pixel_variance32x64_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_sse2 =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance64x32_sse2 =
|
||||
vp9_highbd_12_sub_pixel_variance64x32_sse2;
|
||||
const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_sse2 =
|
||||
const SubpixVarMxNFunc highbd_12_subpel_variance64x64_sse2 =
|
||||
vp9_highbd_12_sub_pixel_variance64x64_sse2;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, VP9SubpelVarianceHighTest,
|
||||
@ -1725,35 +1768,56 @@ INSTANTIATE_TEST_CASE_P(
|
||||
#endif // HAVE_SSE2
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
|
||||
#if CONFIG_VP8
|
||||
#if HAVE_SSE2
|
||||
const SubpixVarMxNFunc vp8_subpel_variance16x16_sse2 =
|
||||
vp8_sub_pixel_variance16x16_wmt;
|
||||
const SubpixVarMxNFunc vp8_subpel_variance16x8_sse2 =
|
||||
vp8_sub_pixel_variance16x8_wmt;
|
||||
const SubpixVarMxNFunc vp8_subpel_variance8x16_sse2 =
|
||||
vp8_sub_pixel_variance8x16_wmt;
|
||||
const SubpixVarMxNFunc vp8_subpel_variance8x8_sse2 =
|
||||
vp8_sub_pixel_variance8x8_wmt;
|
||||
const SubpixVarMxNFunc vp8_subpel_variance4x4_sse2 =
|
||||
vp8_sub_pixel_variance4x4_wmt;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, VP8SubpelVarianceTest,
|
||||
::testing::Values(make_tuple(2, 2, vp8_subpel_variance4x4_sse2, 0),
|
||||
make_tuple(3, 3, vp8_subpel_variance8x8_sse2, 0),
|
||||
make_tuple(3, 4, vp8_subpel_variance8x16_sse2, 0),
|
||||
make_tuple(4, 3, vp8_subpel_variance16x8_sse2, 0),
|
||||
make_tuple(4, 4, vp8_subpel_variance16x16_sse2, 0)));
|
||||
#endif // HAVE_SSE2
|
||||
#endif // CONFIG_VP8
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
#if HAVE_SSSE3
|
||||
#if CONFIG_USE_X86INC
|
||||
|
||||
const vp9_subpixvariance_fn_t subpel_variance4x4_ssse3 =
|
||||
const SubpixVarMxNFunc subpel_variance4x4_ssse3 =
|
||||
vp9_sub_pixel_variance4x4_ssse3;
|
||||
const vp9_subpixvariance_fn_t subpel_variance4x8_ssse3 =
|
||||
const SubpixVarMxNFunc subpel_variance4x8_ssse3 =
|
||||
vp9_sub_pixel_variance4x8_ssse3;
|
||||
const vp9_subpixvariance_fn_t subpel_variance8x4_ssse3 =
|
||||
const SubpixVarMxNFunc subpel_variance8x4_ssse3 =
|
||||
vp9_sub_pixel_variance8x4_ssse3;
|
||||
const vp9_subpixvariance_fn_t subpel_variance8x8_ssse3 =
|
||||
const SubpixVarMxNFunc subpel_variance8x8_ssse3 =
|
||||
vp9_sub_pixel_variance8x8_ssse3;
|
||||
const vp9_subpixvariance_fn_t subpel_variance8x16_ssse3 =
|
||||
const SubpixVarMxNFunc subpel_variance8x16_ssse3 =
|
||||
vp9_sub_pixel_variance8x16_ssse3;
|
||||
const vp9_subpixvariance_fn_t subpel_variance16x8_ssse3 =
|
||||
const SubpixVarMxNFunc subpel_variance16x8_ssse3 =
|
||||
vp9_sub_pixel_variance16x8_ssse3;
|
||||
const vp9_subpixvariance_fn_t subpel_variance16x16_ssse3 =
|
||||
const SubpixVarMxNFunc subpel_variance16x16_ssse3 =
|
||||
vp9_sub_pixel_variance16x16_ssse3;
|
||||
const vp9_subpixvariance_fn_t subpel_variance16x32_ssse3 =
|
||||
const SubpixVarMxNFunc subpel_variance16x32_ssse3 =
|
||||
vp9_sub_pixel_variance16x32_ssse3;
|
||||
const vp9_subpixvariance_fn_t subpel_variance32x16_ssse3 =
|
||||
const SubpixVarMxNFunc subpel_variance32x16_ssse3 =
|
||||
vp9_sub_pixel_variance32x16_ssse3;
|
||||
const vp9_subpixvariance_fn_t subpel_variance32x32_ssse3 =
|
||||
const SubpixVarMxNFunc subpel_variance32x32_ssse3 =
|
||||
vp9_sub_pixel_variance32x32_ssse3;
|
||||
const vp9_subpixvariance_fn_t subpel_variance32x64_ssse3 =
|
||||
const SubpixVarMxNFunc subpel_variance32x64_ssse3 =
|
||||
vp9_sub_pixel_variance32x64_ssse3;
|
||||
const vp9_subpixvariance_fn_t subpel_variance64x32_ssse3 =
|
||||
const SubpixVarMxNFunc subpel_variance64x32_ssse3 =
|
||||
vp9_sub_pixel_variance64x32_ssse3;
|
||||
const vp9_subpixvariance_fn_t subpel_variance64x64_ssse3 =
|
||||
const SubpixVarMxNFunc subpel_variance64x64_ssse3 =
|
||||
vp9_sub_pixel_variance64x64_ssse3;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSSE3, VP9SubpelVarianceTest,
|
||||
@ -1815,6 +1879,19 @@ INSTANTIATE_TEST_CASE_P(
|
||||
#endif // HAVE_SSSE3
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
|
||||
#if CONFIG_VP8
|
||||
#if HAVE_SSSE3
|
||||
const SubpixVarMxNFunc vp8_subpel_variance16x16_ssse3 =
|
||||
vp8_sub_pixel_variance16x16_ssse3;
|
||||
const SubpixVarMxNFunc vp8_subpel_variance16x8_ssse3 =
|
||||
vp8_sub_pixel_variance16x8_ssse3;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSSE3, VP8SubpelVarianceTest,
|
||||
::testing::Values(make_tuple(4, 3, vp8_subpel_variance16x8_ssse3, 0),
|
||||
make_tuple(4, 4, vp8_subpel_variance16x16_ssse3, 0)));
|
||||
#endif // HAVE_SSSE3
|
||||
#endif // CONFIG_VP8
|
||||
|
||||
#if HAVE_AVX2
|
||||
const VarianceMxNFunc mse16x16_avx2 = vpx_mse16x16_avx2;
|
||||
INSTANTIATE_TEST_CASE_P(AVX2, VpxMseTest,
|
||||
@ -1834,9 +1911,9 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(4, 4, variance16x16_avx2, 0)));
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
|
||||
const SubpixVarMxNFunc subpel_variance32x32_avx2 =
|
||||
vp9_sub_pixel_variance32x32_avx2;
|
||||
const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
|
||||
const SubpixVarMxNFunc subpel_variance64x64_avx2 =
|
||||
vp9_sub_pixel_variance64x64_avx2;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
AVX2, VP9SubpelVarianceTest,
|
||||
@ -1854,6 +1931,19 @@ INSTANTIATE_TEST_CASE_P(
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
#endif // HAVE_AVX2
|
||||
|
||||
#if CONFIG_VP8
|
||||
#if HAVE_MEDIA
|
||||
const SubpixVarMxNFunc subpel_variance16x16_media =
|
||||
vp8_sub_pixel_variance16x16_armv6;
|
||||
const SubpixVarMxNFunc subpel_variance8x8_media =
|
||||
vp8_sub_pixel_variance8x8_armv6;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MEDIA, VP8SubpelVarianceTest,
|
||||
::testing::Values(make_tuple(3, 3, subpel_variance8x8_media, 0),
|
||||
make_tuple(4, 4, subpel_variance16x16_media, 0)));
|
||||
#endif // HAVE_MEDIA
|
||||
#endif // CONFIG_VP8
|
||||
|
||||
#if HAVE_NEON
|
||||
const Get4x4SseFunc get4x4sse_cs_neon = vpx_get4x4sse_cs_neon;
|
||||
INSTANTIATE_TEST_CASE_P(NEON, VpxSseTest,
|
||||
@ -1882,14 +1972,26 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(3, 4, variance8x16_neon, 0),
|
||||
make_tuple(3, 3, variance8x8_neon, 0)));
|
||||
|
||||
#if CONFIG_VP8
|
||||
#if HAVE_NEON_ASM
|
||||
const SubpixVarMxNFunc vp8_subpel_variance16x16_neon =
|
||||
vp8_sub_pixel_variance16x16_neon;
|
||||
const SubpixVarMxNFunc vp8_subpel_variance8x8_neon =
|
||||
vp8_sub_pixel_variance8x8_neon;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, VP8SubpelVarianceTest,
|
||||
::testing::Values(make_tuple(3, 3, vp8_subpel_variance8x8_neon, 0),
|
||||
make_tuple(4, 4, vp8_subpel_variance16x16_neon, 0)));
|
||||
#endif // HAVE_NEON_ASM
|
||||
#endif // CONFIG_VP8
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
const vp9_subpixvariance_fn_t subpel_variance8x8_neon =
|
||||
vp9_sub_pixel_variance8x8_neon;
|
||||
const vp9_subpixvariance_fn_t subpel_variance16x16_neon =
|
||||
const SubpixVarMxNFunc subpel_variance8x8_neon = vp9_sub_pixel_variance8x8_neon;
|
||||
const SubpixVarMxNFunc subpel_variance16x16_neon =
|
||||
vp9_sub_pixel_variance16x16_neon;
|
||||
const vp9_subpixvariance_fn_t subpel_variance32x32_neon =
|
||||
const SubpixVarMxNFunc subpel_variance32x32_neon =
|
||||
vp9_sub_pixel_variance32x32_neon;
|
||||
const vp9_subpixvariance_fn_t subpel_variance64x64_neon =
|
||||
const SubpixVarMxNFunc subpel_variance64x64_neon =
|
||||
vp9_sub_pixel_variance64x64_neon;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, VP9SubpelVarianceTest,
|
||||
|
@ -43,14 +43,6 @@ typedef int16_t InterpKernel[SUBPEL_TAPS];
|
||||
|
||||
const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter);
|
||||
|
||||
DECLARE_ALIGNED(256, extern const InterpKernel,
|
||||
vp9_bilinear_filters[SUBPEL_SHIFTS]);
|
||||
|
||||
// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
|
||||
// filter kernel as a 2 tap filter.
|
||||
#define BILINEAR_FILTERS_2TAP(x) \
|
||||
(vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
@ -16,10 +16,18 @@
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/vp9_filter.h"
|
||||
|
||||
#include "vp9/encoder/vp9_variance.h"
|
||||
static uint8_t bilinear_filters[8][2] = {
|
||||
{ 128, 0, },
|
||||
{ 112, 16, },
|
||||
{ 96, 32, },
|
||||
{ 80, 48, },
|
||||
{ 64, 64, },
|
||||
{ 48, 80, },
|
||||
{ 32, 96, },
|
||||
{ 16, 112, },
|
||||
};
|
||||
|
||||
static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
|
||||
uint8_t *output_ptr,
|
||||
@ -27,9 +35,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
|
||||
int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int16_t *vp9_filter) {
|
||||
const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
|
||||
const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
|
||||
const uint8_t *vp9_filter) {
|
||||
const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
|
||||
const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
|
||||
unsigned int i;
|
||||
for (i = 0; i < output_height; ++i) {
|
||||
const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
|
||||
@ -50,9 +58,9 @@ static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,
|
||||
int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int16_t *vp9_filter) {
|
||||
const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
|
||||
const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
|
||||
const uint8_t *vp9_filter) {
|
||||
const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
|
||||
const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
|
||||
unsigned int i, j;
|
||||
for (i = 0; i < output_height; ++i) {
|
||||
for (j = 0; j < output_width; j += 16) {
|
||||
@ -84,9 +92,9 @@ unsigned int vp9_sub_pixel_variance8x8_neon(const uint8_t *src,
|
||||
|
||||
var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
|
||||
9, 8,
|
||||
BILINEAR_FILTERS_2TAP(xoffset));
|
||||
bilinear_filters[xoffset]);
|
||||
var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
|
||||
8, BILINEAR_FILTERS_2TAP(yoffset));
|
||||
8, bilinear_filters[yoffset]);
|
||||
return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
|
||||
}
|
||||
|
||||
@ -102,9 +110,9 @@ unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src,
|
||||
|
||||
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
|
||||
17, 16,
|
||||
BILINEAR_FILTERS_2TAP(xoffset));
|
||||
bilinear_filters[xoffset]);
|
||||
var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16,
|
||||
16, BILINEAR_FILTERS_2TAP(yoffset));
|
||||
16, bilinear_filters[yoffset]);
|
||||
return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
|
||||
}
|
||||
|
||||
@ -120,9 +128,9 @@ unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
|
||||
|
||||
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
|
||||
33, 32,
|
||||
BILINEAR_FILTERS_2TAP(xoffset));
|
||||
bilinear_filters[xoffset]);
|
||||
var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32,
|
||||
32, BILINEAR_FILTERS_2TAP(yoffset));
|
||||
32, bilinear_filters[yoffset]);
|
||||
return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
|
||||
}
|
||||
|
||||
@ -138,8 +146,8 @@ unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src,
|
||||
|
||||
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
|
||||
65, 64,
|
||||
BILINEAR_FILTERS_2TAP(xoffset));
|
||||
bilinear_filters[xoffset]);
|
||||
var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64,
|
||||
64, BILINEAR_FILTERS_2TAP(yoffset));
|
||||
64, bilinear_filters[yoffset]);
|
||||
return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
|
||||
}
|
||||
|
@ -162,9 +162,9 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
|
||||
error_per_bit + 4096) >> 13 : 0)
|
||||
|
||||
|
||||
// convert motion vector component to offset for svf calc
|
||||
// convert motion vector component to offset for sv[a]f calc
|
||||
static INLINE int sp(int x) {
|
||||
return (x & 7) << 1;
|
||||
return x & 7;
|
||||
}
|
||||
|
||||
static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
|
||||
@ -679,16 +679,14 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
|
||||
tc = bc + search_step[idx].col;
|
||||
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
|
||||
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
|
||||
int row_offset = (tr & 0x07) << 1;
|
||||
int col_offset = (tc & 0x07) << 1;
|
||||
MV this_mv;
|
||||
this_mv.row = tr;
|
||||
this_mv.col = tc;
|
||||
if (second_pred == NULL)
|
||||
thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
|
||||
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
|
||||
src_address, src_stride, &sse);
|
||||
else
|
||||
thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
|
||||
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
|
||||
src_address, src_stride, &sse, second_pred);
|
||||
cost_array[idx] = thismse +
|
||||
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
|
||||
@ -709,14 +707,12 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
|
||||
tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep);
|
||||
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
|
||||
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
|
||||
int row_offset = (tr & 0x07) << 1;
|
||||
int col_offset = (tc & 0x07) << 1;
|
||||
MV this_mv = {tr, tc};
|
||||
if (second_pred == NULL)
|
||||
thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
|
||||
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
|
||||
src_address, src_stride, &sse);
|
||||
else
|
||||
thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
|
||||
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
|
||||
src_address, src_stride, &sse, second_pred);
|
||||
cost_array[4] = thismse +
|
||||
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
|
||||
|
@ -19,6 +19,17 @@
|
||||
|
||||
#include "vp9/encoder/vp9_variance.h"
|
||||
|
||||
static uint8_t bilinear_filters[8][2] = {
|
||||
{ 128, 0, },
|
||||
{ 112, 16, },
|
||||
{ 96, 32, },
|
||||
{ 80, 48, },
|
||||
{ 64, 64, },
|
||||
{ 48, 80, },
|
||||
{ 32, 96, },
|
||||
{ 16, 112, },
|
||||
};
|
||||
|
||||
// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
|
||||
// or vertical direction to produce the filtered output block. Used to implement
|
||||
// first-pass of 2-D separable filter.
|
||||
@ -33,7 +44,7 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
|
||||
int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int16_t *vp9_filter) {
|
||||
const uint8_t *vp9_filter) {
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < output_height; i++) {
|
||||
@ -65,7 +76,7 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int16_t *vp9_filter) {
|
||||
const uint8_t *vp9_filter) {
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < output_height; i++) {
|
||||
@ -91,9 +102,9 @@ unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
|
||||
uint8_t temp2[H * W]; \
|
||||
\
|
||||
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
|
||||
BILINEAR_FILTERS_2TAP(xoffset)); \
|
||||
bilinear_filters[xoffset]); \
|
||||
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
|
||||
BILINEAR_FILTERS_2TAP(yoffset)); \
|
||||
bilinear_filters[yoffset]); \
|
||||
\
|
||||
return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
|
||||
}
|
||||
@ -110,9 +121,9 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
|
||||
DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
|
||||
\
|
||||
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
|
||||
BILINEAR_FILTERS_2TAP(xoffset)); \
|
||||
bilinear_filters[xoffset]); \
|
||||
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
|
||||
BILINEAR_FILTERS_2TAP(yoffset)); \
|
||||
bilinear_filters[yoffset]); \
|
||||
\
|
||||
vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
|
||||
\
|
||||
@ -166,7 +177,7 @@ static void highbd_var_filter_block2d_bil_first_pass(
|
||||
int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int16_t *vp9_filter) {
|
||||
const uint8_t *vp9_filter) {
|
||||
unsigned int i, j;
|
||||
uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
|
||||
for (i = 0; i < output_height; i++) {
|
||||
@ -192,7 +203,7 @@ static void highbd_var_filter_block2d_bil_second_pass(
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int16_t *vp9_filter) {
|
||||
const uint8_t *vp9_filter) {
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < output_height; i++) {
|
||||
@ -219,9 +230,9 @@ unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \
|
||||
uint16_t temp2[H * W]; \
|
||||
\
|
||||
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
|
||||
W, BILINEAR_FILTERS_2TAP(xoffset)); \
|
||||
W, bilinear_filters[xoffset]); \
|
||||
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
|
||||
BILINEAR_FILTERS_2TAP(yoffset)); \
|
||||
bilinear_filters[yoffset]); \
|
||||
\
|
||||
return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
|
||||
dst_stride, sse); \
|
||||
@ -236,9 +247,9 @@ unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \
|
||||
uint16_t temp2[H * W]; \
|
||||
\
|
||||
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
|
||||
W, BILINEAR_FILTERS_2TAP(xoffset)); \
|
||||
W, bilinear_filters[xoffset]); \
|
||||
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
|
||||
BILINEAR_FILTERS_2TAP(yoffset)); \
|
||||
bilinear_filters[yoffset]); \
|
||||
\
|
||||
return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
|
||||
W, dst, dst_stride, sse); \
|
||||
@ -253,9 +264,9 @@ unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \
|
||||
uint16_t temp2[H * W]; \
|
||||
\
|
||||
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
|
||||
W, BILINEAR_FILTERS_2TAP(xoffset)); \
|
||||
W, bilinear_filters[xoffset]); \
|
||||
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
|
||||
BILINEAR_FILTERS_2TAP(yoffset)); \
|
||||
bilinear_filters[yoffset]); \
|
||||
\
|
||||
return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
|
||||
W, dst, dst_stride, sse); \
|
||||
@ -273,9 +284,9 @@ unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \
|
||||
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
|
||||
\
|
||||
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
|
||||
W, BILINEAR_FILTERS_2TAP(xoffset)); \
|
||||
W, bilinear_filters[xoffset]); \
|
||||
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
|
||||
BILINEAR_FILTERS_2TAP(yoffset)); \
|
||||
bilinear_filters[yoffset]); \
|
||||
\
|
||||
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
|
||||
CONVERT_TO_BYTEPTR(temp2), W); \
|
||||
@ -295,9 +306,9 @@ unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
|
||||
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
|
||||
\
|
||||
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
|
||||
W, BILINEAR_FILTERS_2TAP(xoffset)); \
|
||||
W, bilinear_filters[xoffset]); \
|
||||
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
|
||||
BILINEAR_FILTERS_2TAP(yoffset)); \
|
||||
bilinear_filters[yoffset]); \
|
||||
\
|
||||
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
|
||||
CONVERT_TO_BYTEPTR(temp2), W); \
|
||||
@ -317,9 +328,9 @@ unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
|
||||
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
|
||||
\
|
||||
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
|
||||
W, BILINEAR_FILTERS_2TAP(xoffset)); \
|
||||
W, bilinear_filters[xoffset]); \
|
||||
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
|
||||
BILINEAR_FILTERS_2TAP(yoffset)); \
|
||||
bilinear_filters[yoffset]); \
|
||||
\
|
||||
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
|
||||
CONVERT_TO_BYTEPTR(temp2), W); \
|
||||
|
@ -14,35 +14,19 @@ SECTION_RODATA
|
||||
pw_8: times 8 dw 8
|
||||
bilin_filter_m_sse2: times 8 dw 16
|
||||
times 8 dw 0
|
||||
times 8 dw 15
|
||||
times 8 dw 1
|
||||
times 8 dw 14
|
||||
times 8 dw 2
|
||||
times 8 dw 13
|
||||
times 8 dw 3
|
||||
times 8 dw 12
|
||||
times 8 dw 4
|
||||
times 8 dw 11
|
||||
times 8 dw 5
|
||||
times 8 dw 10
|
||||
times 8 dw 6
|
||||
times 8 dw 9
|
||||
times 8 dw 7
|
||||
times 16 dw 8
|
||||
times 8 dw 7
|
||||
times 8 dw 9
|
||||
times 8 dw 6
|
||||
times 8 dw 10
|
||||
times 8 dw 5
|
||||
times 8 dw 11
|
||||
times 8 dw 4
|
||||
times 8 dw 12
|
||||
times 8 dw 3
|
||||
times 8 dw 13
|
||||
times 8 dw 2
|
||||
times 8 dw 14
|
||||
times 8 dw 1
|
||||
times 8 dw 15
|
||||
|
||||
SECTION .text
|
||||
|
||||
|
@ -14,52 +14,28 @@ SECTION_RODATA
|
||||
pw_8: times 8 dw 8
|
||||
bilin_filter_m_sse2: times 8 dw 16
|
||||
times 8 dw 0
|
||||
times 8 dw 15
|
||||
times 8 dw 1
|
||||
times 8 dw 14
|
||||
times 8 dw 2
|
||||
times 8 dw 13
|
||||
times 8 dw 3
|
||||
times 8 dw 12
|
||||
times 8 dw 4
|
||||
times 8 dw 11
|
||||
times 8 dw 5
|
||||
times 8 dw 10
|
||||
times 8 dw 6
|
||||
times 8 dw 9
|
||||
times 8 dw 7
|
||||
times 16 dw 8
|
||||
times 8 dw 7
|
||||
times 8 dw 9
|
||||
times 8 dw 6
|
||||
times 8 dw 10
|
||||
times 8 dw 5
|
||||
times 8 dw 11
|
||||
times 8 dw 4
|
||||
times 8 dw 12
|
||||
times 8 dw 3
|
||||
times 8 dw 13
|
||||
times 8 dw 2
|
||||
times 8 dw 14
|
||||
times 8 dw 1
|
||||
times 8 dw 15
|
||||
|
||||
bilin_filter_m_ssse3: times 8 db 16, 0
|
||||
times 8 db 15, 1
|
||||
times 8 db 14, 2
|
||||
times 8 db 13, 3
|
||||
times 8 db 12, 4
|
||||
times 8 db 11, 5
|
||||
times 8 db 10, 6
|
||||
times 8 db 9, 7
|
||||
times 16 db 8
|
||||
times 8 db 7, 9
|
||||
times 8 db 6, 10
|
||||
times 8 db 5, 11
|
||||
times 8 db 4, 12
|
||||
times 8 db 3, 13
|
||||
times 8 db 2, 14
|
||||
times 8 db 1, 15
|
||||
|
||||
SECTION .text
|
||||
|
||||
|
@ -17,36 +17,20 @@
|
||||
DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
|
||||
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
|
||||
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
|
||||
15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
|
||||
15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
|
||||
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
|
||||
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
|
||||
13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
|
||||
13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
|
||||
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
|
||||
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
|
||||
11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
|
||||
11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
|
||||
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
|
||||
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
|
||||
9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
|
||||
9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
|
||||
7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
|
||||
6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
|
||||
6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
|
||||
5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
|
||||
5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
|
||||
4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
|
||||
4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
|
||||
3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
|
||||
3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
|
||||
2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
|
||||
2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
|
||||
1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15,
|
||||
1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15
|
||||
};
|
||||
|
||||
#define FILTER_SRC(filter) \
|
||||
|
Loading…
x
Reference in New Issue
Block a user