Merge remote-tracking branch 'webm/master' into nextgenv2
This commit is contained in:
commit
aa6c754635
@ -37,120 +37,23 @@ const int number_of_iterations = 10000;
|
|||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit,
|
typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *limit, const uint8_t *thresh,
|
||||||
int count, int bd);
|
int bd);
|
||||||
typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0,
|
typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0,
|
||||||
const uint8_t *limit0, const uint8_t *thresh0,
|
const uint8_t *limit0, const uint8_t *thresh0,
|
||||||
const uint8_t *blimit1, const uint8_t *limit1,
|
const uint8_t *blimit1, const uint8_t *limit1,
|
||||||
const uint8_t *thresh1, int bd);
|
const uint8_t *thresh1, int bd);
|
||||||
#else
|
#else
|
||||||
typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit,
|
typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *limit, const uint8_t *thresh);
|
||||||
int count);
|
|
||||||
typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
|
typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
|
||||||
const uint8_t *limit0, const uint8_t *thresh0,
|
const uint8_t *limit0, const uint8_t *thresh0,
|
||||||
const uint8_t *blimit1, const uint8_t *limit1,
|
const uint8_t *blimit1, const uint8_t *limit1,
|
||||||
const uint8_t *thresh1);
|
const uint8_t *thresh1);
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
|
||||||
typedef std::tr1::tuple<loop_op_t, loop_op_t, int, int> loop8_param_t;
|
typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
|
||||||
typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
|
typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
|
||||||
|
|
||||||
#if HAVE_SSE2
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
|
||||||
void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count, int bd) {
|
|
||||||
vpx_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count, int bd) {
|
|
||||||
vpx_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count, int bd) {
|
|
||||||
vpx_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count, int bd) {
|
|
||||||
vpx_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count) {
|
|
||||||
vpx_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count) {
|
|
||||||
vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count) {
|
|
||||||
vpx_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count) {
|
|
||||||
vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
|
|
||||||
}
|
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
||||||
#endif // HAVE_SSE2
|
|
||||||
|
|
||||||
#if HAVE_NEON_ASM
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
|
||||||
// No neon high bitdepth functions.
|
|
||||||
#else
|
|
||||||
void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count) {
|
|
||||||
vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count) {
|
|
||||||
vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count) {
|
|
||||||
vpx_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count) {
|
|
||||||
vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
|
|
||||||
}
|
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
||||||
#endif // HAVE_NEON_ASM
|
|
||||||
|
|
||||||
#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
|
|
||||||
void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count) {
|
|
||||||
vpx_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
|
||||||
int count) {
|
|
||||||
vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
|
|
||||||
}
|
|
||||||
#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
|
|
||||||
|
|
||||||
class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
|
class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
|
||||||
public:
|
public:
|
||||||
virtual ~Loop8Test6Param() {}
|
virtual ~Loop8Test6Param() {}
|
||||||
@ -158,7 +61,6 @@ class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
|
|||||||
loopfilter_op_ = GET_PARAM(0);
|
loopfilter_op_ = GET_PARAM(0);
|
||||||
ref_loopfilter_op_ = GET_PARAM(1);
|
ref_loopfilter_op_ = GET_PARAM(1);
|
||||||
bit_depth_ = GET_PARAM(2);
|
bit_depth_ = GET_PARAM(2);
|
||||||
count_ = GET_PARAM(3);
|
|
||||||
mask_ = (1 << bit_depth_) - 1;
|
mask_ = (1 << bit_depth_) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -166,7 +68,6 @@ class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
int bit_depth_;
|
int bit_depth_;
|
||||||
int count_;
|
|
||||||
int mask_;
|
int mask_;
|
||||||
loop_op_t loopfilter_op_;
|
loop_op_t loopfilter_op_;
|
||||||
loop_op_t ref_loopfilter_op_;
|
loop_op_t ref_loopfilter_op_;
|
||||||
@ -253,13 +154,13 @@ TEST_P(Loop8Test6Param, OperationCheck) {
|
|||||||
ref_s[j] = s[j];
|
ref_s[j] = s[j];
|
||||||
}
|
}
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
|
ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
|
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
|
||||||
#else
|
#else
|
||||||
ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
|
ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh);
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
|
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
@ -325,13 +226,13 @@ TEST_P(Loop8Test6Param, ValueCheck) {
|
|||||||
ref_s[j] = s[j];
|
ref_s[j] = s[j];
|
||||||
}
|
}
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
|
ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
|
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
|
||||||
#else
|
#else
|
||||||
ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
|
ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh);
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
|
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
err_count += ref_s[j] != s[j];
|
err_count += ref_s[j] != s[j];
|
||||||
@ -529,70 +430,85 @@ TEST_P(Loop8Test9Param, ValueCheck) {
|
|||||||
|
|
||||||
using std::tr1::make_tuple;
|
using std::tr1::make_tuple;
|
||||||
|
|
||||||
|
#if HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
MMX, Loop8Test6Param,
|
||||||
|
::testing::Values(
|
||||||
|
make_tuple(&vpx_lpf_horizontal_4_mmx,
|
||||||
|
&vpx_lpf_horizontal_4_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_vertical_4_mmx,
|
||||||
|
&vpx_lpf_vertical_4_c, 8)));
|
||||||
|
#endif // HAVE_MMX
|
||||||
|
|
||||||
#if HAVE_SSE2
|
#if HAVE_SSE2
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSE2, Loop8Test6Param,
|
SSE2, Loop8Test6Param,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_4_c, 8, 1),
|
&vpx_highbd_lpf_horizontal_4_c, 8),
|
||||||
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
|
||||||
&vpx_highbd_lpf_vertical_4_c, 8, 1),
|
&vpx_highbd_lpf_vertical_4_c, 8),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_8_c, 8, 1),
|
&vpx_highbd_lpf_horizontal_8_c, 8),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_16_c, 8, 1),
|
&vpx_highbd_lpf_horizontal_edge_8_c, 8),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_16_c, 8, 2),
|
&vpx_highbd_lpf_horizontal_edge_16_c, 8),
|
||||||
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
|
||||||
&vpx_highbd_lpf_vertical_8_c, 8, 1),
|
&vpx_highbd_lpf_vertical_8_c, 8),
|
||||||
make_tuple(&wrapper_vertical_16_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
|
||||||
&wrapper_vertical_16_c, 8, 1),
|
&vpx_highbd_lpf_vertical_16_c, 8),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_4_c, 10, 1),
|
&vpx_highbd_lpf_horizontal_4_c, 10),
|
||||||
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
|
||||||
&vpx_highbd_lpf_vertical_4_c, 10, 1),
|
&vpx_highbd_lpf_vertical_4_c, 10),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_8_c, 10, 1),
|
&vpx_highbd_lpf_horizontal_8_c, 10),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_16_c, 10, 1),
|
&vpx_highbd_lpf_horizontal_edge_8_c, 10),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_16_c, 10, 2),
|
&vpx_highbd_lpf_horizontal_edge_16_c, 10),
|
||||||
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
|
||||||
&vpx_highbd_lpf_vertical_8_c, 10, 1),
|
&vpx_highbd_lpf_vertical_8_c, 10),
|
||||||
make_tuple(&wrapper_vertical_16_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
|
||||||
&wrapper_vertical_16_c, 10, 1),
|
&vpx_highbd_lpf_vertical_16_c, 10),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_4_c, 12, 1),
|
&vpx_highbd_lpf_horizontal_4_c, 12),
|
||||||
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
|
||||||
&vpx_highbd_lpf_vertical_4_c, 12, 1),
|
&vpx_highbd_lpf_vertical_4_c, 12),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_8_c, 12, 1),
|
&vpx_highbd_lpf_horizontal_8_c, 12),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_16_c, 12, 1),
|
&vpx_highbd_lpf_horizontal_edge_8_c, 12),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_16_c, 12, 2),
|
&vpx_highbd_lpf_horizontal_edge_16_c, 12),
|
||||||
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
|
||||||
&vpx_highbd_lpf_vertical_8_c, 12, 1),
|
&vpx_highbd_lpf_vertical_8_c, 12),
|
||||||
make_tuple(&wrapper_vertical_16_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
|
||||||
&wrapper_vertical_16_c, 12, 1),
|
&vpx_highbd_lpf_vertical_16_c, 12),
|
||||||
make_tuple(&wrapper_vertical_16_dual_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
|
||||||
&wrapper_vertical_16_dual_c, 8, 1),
|
&vpx_highbd_lpf_vertical_16_dual_c, 8),
|
||||||
make_tuple(&wrapper_vertical_16_dual_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
|
||||||
&wrapper_vertical_16_dual_c, 10, 1),
|
&vpx_highbd_lpf_vertical_16_dual_c, 10),
|
||||||
make_tuple(&wrapper_vertical_16_dual_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
|
||||||
&wrapper_vertical_16_dual_c, 12, 1)));
|
&vpx_highbd_lpf_vertical_16_dual_c, 12)));
|
||||||
#else
|
#else
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SSE2, Loop8Test6Param,
|
SSE2, Loop8Test6Param,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1),
|
make_tuple(&vpx_lpf_horizontal_8_sse2,
|
||||||
make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),
|
&vpx_lpf_horizontal_8_c, 8),
|
||||||
make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),
|
make_tuple(&vpx_lpf_horizontal_edge_8_sse2,
|
||||||
make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1),
|
&vpx_lpf_horizontal_edge_8_c, 8),
|
||||||
make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1),
|
make_tuple(&vpx_lpf_horizontal_edge_16_sse2,
|
||||||
make_tuple(&wrapper_vertical_16_dual_sse2,
|
&vpx_lpf_horizontal_edge_16_c, 8),
|
||||||
&wrapper_vertical_16_dual_c, 8, 1)));
|
make_tuple(&vpx_lpf_vertical_8_sse2,
|
||||||
|
&vpx_lpf_vertical_8_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_vertical_16_sse2,
|
||||||
|
&vpx_lpf_vertical_16_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_vertical_16_dual_sse2,
|
||||||
|
&vpx_lpf_vertical_16_dual_c, 8)));
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -600,9 +516,10 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
AVX2, Loop8Test6Param,
|
AVX2, Loop8Test6Param,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1),
|
make_tuple(&vpx_lpf_horizontal_edge_8_avx2,
|
||||||
make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8,
|
&vpx_lpf_horizontal_edge_8_c, 8),
|
||||||
2)));
|
make_tuple(&vpx_lpf_horizontal_edge_16_avx2,
|
||||||
|
&vpx_lpf_horizontal_edge_16_c, 8)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSE2
|
#if HAVE_SSE2
|
||||||
@ -659,23 +576,23 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
#if HAVE_NEON_ASM
|
#if HAVE_NEON_ASM
|
||||||
// Using #if inside the macro is unsupported on MSVS but the tests are not
|
// Using #if inside the macro is unsupported on MSVS but the tests are not
|
||||||
// currently built for MSVS with ARM and NEON.
|
// currently built for MSVS with ARM and NEON.
|
||||||
make_tuple(&vpx_lpf_horizontal_16_neon,
|
make_tuple(&vpx_lpf_horizontal_edge_8_neon,
|
||||||
&vpx_lpf_horizontal_16_c, 8, 1),
|
&vpx_lpf_horizontal_edge_8_c, 8),
|
||||||
make_tuple(&vpx_lpf_horizontal_16_neon,
|
make_tuple(&vpx_lpf_horizontal_edge_16_neon,
|
||||||
&vpx_lpf_horizontal_16_c, 8, 2),
|
&vpx_lpf_horizontal_edge_16_c, 8),
|
||||||
make_tuple(&wrapper_vertical_16_neon,
|
make_tuple(&vpx_lpf_vertical_16_neon,
|
||||||
&wrapper_vertical_16_c, 8, 1),
|
&vpx_lpf_vertical_16_c, 8),
|
||||||
make_tuple(&wrapper_vertical_16_dual_neon,
|
make_tuple(&vpx_lpf_vertical_16_dual_neon,
|
||||||
&wrapper_vertical_16_dual_c, 8, 1),
|
&vpx_lpf_vertical_16_dual_c, 8),
|
||||||
#endif // HAVE_NEON_ASM
|
#endif // HAVE_NEON_ASM
|
||||||
make_tuple(&vpx_lpf_horizontal_8_neon,
|
make_tuple(&vpx_lpf_horizontal_8_neon,
|
||||||
&vpx_lpf_horizontal_8_c, 8, 1),
|
&vpx_lpf_horizontal_8_c, 8),
|
||||||
make_tuple(&vpx_lpf_vertical_8_neon,
|
make_tuple(&vpx_lpf_vertical_8_neon,
|
||||||
&vpx_lpf_vertical_8_c, 8, 1),
|
&vpx_lpf_vertical_8_c, 8),
|
||||||
make_tuple(&vpx_lpf_horizontal_4_neon,
|
make_tuple(&vpx_lpf_horizontal_4_neon,
|
||||||
&vpx_lpf_horizontal_4_c, 8, 1),
|
&vpx_lpf_horizontal_4_c, 8),
|
||||||
make_tuple(&vpx_lpf_vertical_4_neon,
|
make_tuple(&vpx_lpf_vertical_4_neon,
|
||||||
&vpx_lpf_vertical_4_c, 8, 1)));
|
&vpx_lpf_vertical_4_c, 8)));
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
NEON, Loop8Test9Param,
|
NEON, Loop8Test9Param,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
@ -692,15 +609,58 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
#endif // HAVE_NEON
|
#endif // HAVE_NEON
|
||||||
|
|
||||||
|
#if HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
DSPR2, Loop8Test6Param,
|
||||||
|
::testing::Values(
|
||||||
|
make_tuple(&vpx_lpf_horizontal_4_dspr2,
|
||||||
|
&vpx_lpf_horizontal_4_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_horizontal_8_dspr2,
|
||||||
|
&vpx_lpf_horizontal_8_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_horizontal_edge_8,
|
||||||
|
&vpx_lpf_horizontal_edge_8, 8),
|
||||||
|
make_tuple(&vpx_lpf_horizontal_edge_16,
|
||||||
|
&vpx_lpf_horizontal_edge_16, 8),
|
||||||
|
make_tuple(&vpx_lpf_vertical_4_dspr2,
|
||||||
|
&vpx_lpf_vertical_4_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_vertical_8_dspr2,
|
||||||
|
&vpx_lpf_vertical_8_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_vertical_16_dspr2,
|
||||||
|
&vpx_lpf_vertical_16_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_vertical_16_dual_dspr2,
|
||||||
|
&vpx_lpf_vertical_16_dual_c, 8)));
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
DSPR2, Loop8Test9Param,
|
||||||
|
::testing::Values(
|
||||||
|
make_tuple(&vpx_lpf_horizontal_4_dual_dspr2,
|
||||||
|
&vpx_lpf_horizontal_4_dual_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_horizontal_8_dual_dspr2,
|
||||||
|
&vpx_lpf_horizontal_8_dual_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_vertical_4_dual_dspr2,
|
||||||
|
&vpx_lpf_vertical_4_dual_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_vertical_8_dual_dspr2,
|
||||||
|
&vpx_lpf_vertical_8_dual_c, 8)));
|
||||||
|
#endif // HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
|
||||||
#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
|
#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
MSA, Loop8Test6Param,
|
MSA, Loop8Test6Param,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1),
|
make_tuple(&vpx_lpf_horizontal_4_msa,
|
||||||
make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
|
&vpx_lpf_horizontal_4_c, 8),
|
||||||
make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),
|
make_tuple(&vpx_lpf_horizontal_8_msa,
|
||||||
make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1),
|
&vpx_lpf_horizontal_8_c, 8),
|
||||||
make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));
|
make_tuple(&vpx_lpf_horizontal_edge_8_msa,
|
||||||
|
&vpx_lpf_horizontal_edge_8_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_horizontal_edge_16_msa,
|
||||||
|
&vpx_lpf_horizontal_edge_16_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_vertical_4_msa,
|
||||||
|
&vpx_lpf_vertical_4_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_vertical_8_msa,
|
||||||
|
&vpx_lpf_vertical_8_c, 8),
|
||||||
|
make_tuple(&vpx_lpf_vertical_16_msa,
|
||||||
|
&vpx_lpf_vertical_16_c, 8)));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
MSA, Loop8Test9Param,
|
MSA, Loop8Test9Param,
|
||||||
|
@ -331,7 +331,6 @@ static void filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
|
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
|
||||||
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
|
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
|
||||||
|
|
||||||
// TODO(yunqingwang): count in loopfilter functions should be removed.
|
|
||||||
if (mask & 1) {
|
if (mask & 1) {
|
||||||
if ((mask_16x16_0 | mask_16x16_1) & 1) {
|
if ((mask_16x16_0 | mask_16x16_1) & 1) {
|
||||||
if ((mask_16x16_0 & mask_16x16_1) & 1) {
|
if ((mask_16x16_0 & mask_16x16_1) & 1) {
|
||||||
@ -352,11 +351,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
||||||
lfi1->hev_thr);
|
lfi1->hev_thr);
|
||||||
} else if (mask_8x8_0 & 1) {
|
} else if (mask_8x8_0 & 1) {
|
||||||
vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
|
vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
|
||||||
1);
|
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
|
vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
|
||||||
lfi1->hev_thr, 1);
|
lfi1->hev_thr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -366,11 +364,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
||||||
lfi1->hev_thr);
|
lfi1->hev_thr);
|
||||||
} else if (mask_4x4_0 & 1) {
|
} else if (mask_4x4_0 & 1) {
|
||||||
vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
|
vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
|
||||||
1);
|
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
|
vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
|
||||||
lfi1->hev_thr, 1);
|
lfi1->hev_thr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -381,10 +378,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
lfi1->hev_thr);
|
lfi1->hev_thr);
|
||||||
} else if (mask_4x4_int_0 & 1) {
|
} else if (mask_4x4_int_0 & 1) {
|
||||||
vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
|
vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
|
||||||
lfi0->hev_thr, 1);
|
lfi0->hev_thr);
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
|
vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
|
||||||
lfi1->hev_thr, 1);
|
lfi1->hev_thr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -431,7 +428,6 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
|
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
|
||||||
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
|
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
|
||||||
|
|
||||||
// TODO(yunqingwang): count in loopfilter functions should be removed.
|
|
||||||
if (mask & 1) {
|
if (mask & 1) {
|
||||||
if ((mask_16x16_0 | mask_16x16_1) & 1) {
|
if ((mask_16x16_0 | mask_16x16_1) & 1) {
|
||||||
if ((mask_16x16_0 & mask_16x16_1) & 1) {
|
if ((mask_16x16_0 & mask_16x16_1) & 1) {
|
||||||
@ -453,10 +449,10 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
lfi1->hev_thr, bd);
|
lfi1->hev_thr, bd);
|
||||||
} else if (mask_8x8_0 & 1) {
|
} else if (mask_8x8_0 & 1) {
|
||||||
vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
|
vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
|
||||||
lfi0->hev_thr, 1, bd);
|
lfi0->hev_thr, bd);
|
||||||
} else {
|
} else {
|
||||||
vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
|
vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
|
||||||
lfi1->lim, lfi1->hev_thr, 1, bd);
|
lfi1->lim, lfi1->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -467,10 +463,10 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
lfi1->hev_thr, bd);
|
lfi1->hev_thr, bd);
|
||||||
} else if (mask_4x4_0 & 1) {
|
} else if (mask_4x4_0 & 1) {
|
||||||
vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
|
vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
|
||||||
lfi0->hev_thr, 1, bd);
|
lfi0->hev_thr, bd);
|
||||||
} else {
|
} else {
|
||||||
vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
|
vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
|
||||||
lfi1->lim, lfi1->hev_thr, 1, bd);
|
lfi1->lim, lfi1->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -481,10 +477,10 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
lfi1->hev_thr, bd);
|
lfi1->hev_thr, bd);
|
||||||
} else if (mask_4x4_int_0 & 1) {
|
} else if (mask_4x4_int_0 & 1) {
|
||||||
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
|
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
|
||||||
lfi0->hev_thr, 1, bd);
|
lfi0->hev_thr, bd);
|
||||||
} else {
|
} else {
|
||||||
vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
|
vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
|
||||||
lfi1->lim, lfi1->hev_thr, 1, bd);
|
lfi1->lim, lfi1->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -521,12 +517,12 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||||||
if (mask & 1) {
|
if (mask & 1) {
|
||||||
if (mask_16x16 & 1) {
|
if (mask_16x16 & 1) {
|
||||||
if ((mask_16x16 & 3) == 3) {
|
if ((mask_16x16 & 3) == 3) {
|
||||||
vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 2);
|
lfi->hev_thr);
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
} else if (mask_8x8 & 1) {
|
} else if (mask_8x8 & 1) {
|
||||||
if ((mask_8x8 & 3) == 3) {
|
if ((mask_8x8 & 3) == 3) {
|
||||||
@ -544,18 +540,18 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||||||
} else {
|
} else {
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
else if (mask_4x4_int & 2)
|
else if (mask_4x4_int & 2)
|
||||||
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
||||||
lfin->lim, lfin->hev_thr, 1);
|
lfin->lim, lfin->hev_thr);
|
||||||
}
|
}
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
|
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
} else if (mask_4x4 & 1) {
|
} else if (mask_4x4 & 1) {
|
||||||
if ((mask_4x4 & 3) == 3) {
|
if ((mask_4x4 & 3) == 3) {
|
||||||
@ -572,22 +568,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||||||
} else {
|
} else {
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
else if (mask_4x4_int & 2)
|
else if (mask_4x4_int & 2)
|
||||||
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
||||||
lfin->lim, lfin->hev_thr, 1);
|
lfin->lim, lfin->hev_thr);
|
||||||
}
|
}
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
|
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
} else if (mask_4x4_int & 1) {
|
} else if (mask_4x4_int & 1) {
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s += 8 * count;
|
s += 8 * count;
|
||||||
@ -618,12 +614,12 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
|
|||||||
if (mask & 1) {
|
if (mask & 1) {
|
||||||
if (mask_16x16 & 1) {
|
if (mask_16x16 & 1) {
|
||||||
if ((mask_16x16 & 3) == 3) {
|
if ((mask_16x16 & 3) == 3) {
|
||||||
vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 2, bd);
|
lfi->hev_thr, bd);
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
}
|
}
|
||||||
} else if (mask_8x8 & 1) {
|
} else if (mask_8x8 & 1) {
|
||||||
if ((mask_8x8 & 3) == 3) {
|
if ((mask_8x8 & 3) == 3) {
|
||||||
@ -642,20 +638,20 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
|
|||||||
} else {
|
} else {
|
||||||
if (mask_4x4_int & 1) {
|
if (mask_4x4_int & 1) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
||||||
lfi->lim, lfi->hev_thr, 1, bd);
|
lfi->lim, lfi->hev_thr, bd);
|
||||||
} else if (mask_4x4_int & 2) {
|
} else if (mask_4x4_int & 2) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
||||||
lfin->lim, lfin->hev_thr, 1, bd);
|
lfin->lim, lfin->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
|
|
||||||
if (mask_4x4_int & 1) {
|
if (mask_4x4_int & 1) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
||||||
lfi->lim, lfi->hev_thr, 1, bd);
|
lfi->lim, lfi->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (mask_4x4 & 1) {
|
} else if (mask_4x4 & 1) {
|
||||||
@ -674,25 +670,25 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
|
|||||||
} else {
|
} else {
|
||||||
if (mask_4x4_int & 1) {
|
if (mask_4x4_int & 1) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
||||||
lfi->lim, lfi->hev_thr, 1, bd);
|
lfi->lim, lfi->hev_thr, bd);
|
||||||
} else if (mask_4x4_int & 2) {
|
} else if (mask_4x4_int & 2) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
||||||
lfin->lim, lfin->hev_thr, 1, bd);
|
lfin->lim, lfin->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
|
|
||||||
if (mask_4x4_int & 1) {
|
if (mask_4x4_int & 1) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
||||||
lfi->lim, lfi->hev_thr, 1, bd);
|
lfi->lim, lfi->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (mask_4x4_int & 1) {
|
} else if (mask_4x4_int & 1) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s += 8 * count;
|
s += 8 * count;
|
||||||
@ -1152,13 +1148,13 @@ static void filter_selectively_vert(uint8_t *s, int pitch,
|
|||||||
if (mask_16x16 & 1) {
|
if (mask_16x16 & 1) {
|
||||||
vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
} else if (mask_8x8 & 1) {
|
} else if (mask_8x8 & 1) {
|
||||||
vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
} else if (mask_4x4 & 1) {
|
} else if (mask_4x4 & 1) {
|
||||||
vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
s += 8;
|
s += 8;
|
||||||
lfl += 1;
|
lfl += 1;
|
||||||
mask_16x16 >>= 1;
|
mask_16x16 >>= 1;
|
||||||
@ -1188,15 +1184,15 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch,
|
|||||||
lfi->hev_thr, bd);
|
lfi->hev_thr, bd);
|
||||||
} else if (mask_8x8 & 1) {
|
} else if (mask_8x8 & 1) {
|
||||||
vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
} else if (mask_4x4 & 1) {
|
} else if (mask_4x4 & 1) {
|
||||||
vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
s += 8;
|
s += 8;
|
||||||
lfl += 1;
|
lfl += 1;
|
||||||
mask_16x16 >>= 1;
|
mask_16x16 >>= 1;
|
||||||
|
@ -2496,8 +2496,9 @@ static void resize_mv_buffer(VP10_COMMON *cm) {
|
|||||||
vpx_free(cm->cur_frame->mvs);
|
vpx_free(cm->cur_frame->mvs);
|
||||||
cm->cur_frame->mi_rows = cm->mi_rows;
|
cm->cur_frame->mi_rows = cm->mi_rows;
|
||||||
cm->cur_frame->mi_cols = cm->mi_cols;
|
cm->cur_frame->mi_cols = cm->mi_cols;
|
||||||
cm->cur_frame->mvs = (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
|
CHECK_MEM_ERROR(cm, cm->cur_frame->mvs,
|
||||||
sizeof(*cm->cur_frame->mvs));
|
(MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
|
||||||
|
sizeof(*cm->cur_frame->mvs)));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void resize_context_buffers(VP10_COMMON *cm, int width, int height) {
|
static void resize_context_buffers(VP10_COMMON *cm, int width, int height) {
|
||||||
|
@ -64,13 +64,13 @@ CYCLIC_REFRESH *vp10_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
|
|||||||
|
|
||||||
cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map));
|
cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map));
|
||||||
if (cr->map == NULL) {
|
if (cr->map == NULL) {
|
||||||
vpx_free(cr);
|
vp10_cyclic_refresh_free(cr);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
|
last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
|
||||||
cr->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
|
cr->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
|
||||||
if (cr->last_coded_q_map == NULL) {
|
if (cr->last_coded_q_map == NULL) {
|
||||||
vpx_free(cr);
|
vp10_cyclic_refresh_free(cr);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
assert(MAXQ <= 255);
|
assert(MAXQ <= 255);
|
||||||
|
@ -1788,8 +1788,9 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (cpi->b_calculate_consistency) {
|
if (cpi->b_calculate_consistency) {
|
||||||
cpi->ssim_vars = vpx_malloc(sizeof(*cpi->ssim_vars) *
|
CHECK_MEM_ERROR(cm, cpi->ssim_vars,
|
||||||
4 * cpi->common.mi_rows * cpi->common.mi_cols);
|
vpx_malloc(sizeof(*cpi->ssim_vars) * 4 *
|
||||||
|
cpi->common.mi_rows * cpi->common.mi_cols));
|
||||||
cpi->worst_consistency = 100.0;
|
cpi->worst_consistency = 100.0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -2611,16 +2612,16 @@ static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) {
|
|||||||
vpx_extend_frame_inner_borders(cm->frame_to_show);
|
vpx_extend_frame_inner_borders(cm->frame_to_show);
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE void alloc_frame_mvs(const VP10_COMMON *cm,
|
static INLINE void alloc_frame_mvs(VP10_COMMON *const cm,
|
||||||
int buffer_idx) {
|
int buffer_idx) {
|
||||||
RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
|
RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
|
||||||
if (new_fb_ptr->mvs == NULL ||
|
if (new_fb_ptr->mvs == NULL ||
|
||||||
new_fb_ptr->mi_rows < cm->mi_rows ||
|
new_fb_ptr->mi_rows < cm->mi_rows ||
|
||||||
new_fb_ptr->mi_cols < cm->mi_cols) {
|
new_fb_ptr->mi_cols < cm->mi_cols) {
|
||||||
vpx_free(new_fb_ptr->mvs);
|
vpx_free(new_fb_ptr->mvs);
|
||||||
new_fb_ptr->mvs =
|
CHECK_MEM_ERROR(cm, new_fb_ptr->mvs,
|
||||||
(MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
|
(MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
|
||||||
sizeof(*new_fb_ptr->mvs));
|
sizeof(*new_fb_ptr->mvs)));
|
||||||
new_fb_ptr->mi_rows = cm->mi_rows;
|
new_fb_ptr->mi_rows = cm->mi_rows;
|
||||||
new_fb_ptr->mi_cols = cm->mi_cols;
|
new_fb_ptr->mi_cols = cm->mi_cols;
|
||||||
}
|
}
|
||||||
@ -2667,12 +2668,13 @@ void vp10_scale_references(VP10_COMP *cpi) {
|
|||||||
if (force_scaling ||
|
if (force_scaling ||
|
||||||
new_fb_ptr->buf.y_crop_width != cm->width ||
|
new_fb_ptr->buf.y_crop_width != cm->width ||
|
||||||
new_fb_ptr->buf.y_crop_height != cm->height) {
|
new_fb_ptr->buf.y_crop_height != cm->height) {
|
||||||
vpx_realloc_frame_buffer(&new_fb_ptr->buf,
|
if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
|
||||||
cm->width, cm->height,
|
|
||||||
cm->subsampling_x, cm->subsampling_y,
|
cm->subsampling_x, cm->subsampling_y,
|
||||||
cm->use_highbitdepth,
|
cm->use_highbitdepth,
|
||||||
VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
|
VP9_ENC_BORDER_IN_PIXELS,
|
||||||
NULL, NULL, NULL);
|
cm->byte_alignment, NULL, NULL, NULL))
|
||||||
|
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||||
|
"Failed to allocate frame buffer");
|
||||||
scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth);
|
scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth);
|
||||||
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
|
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
|
||||||
alloc_frame_mvs(cm, new_fb);
|
alloc_frame_mvs(cm, new_fb);
|
||||||
@ -2692,11 +2694,12 @@ void vp10_scale_references(VP10_COMP *cpi) {
|
|||||||
if (force_scaling ||
|
if (force_scaling ||
|
||||||
new_fb_ptr->buf.y_crop_width != cm->width ||
|
new_fb_ptr->buf.y_crop_width != cm->width ||
|
||||||
new_fb_ptr->buf.y_crop_height != cm->height) {
|
new_fb_ptr->buf.y_crop_height != cm->height) {
|
||||||
vpx_realloc_frame_buffer(&new_fb_ptr->buf,
|
if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
|
||||||
cm->width, cm->height,
|
|
||||||
cm->subsampling_x, cm->subsampling_y,
|
cm->subsampling_x, cm->subsampling_y,
|
||||||
VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
|
VP9_ENC_BORDER_IN_PIXELS,
|
||||||
NULL, NULL, NULL);
|
cm->byte_alignment, NULL, NULL, NULL))
|
||||||
|
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||||
|
"Failed to allocate frame buffer");
|
||||||
scale_and_extend_frame(ref, &new_fb_ptr->buf);
|
scale_and_extend_frame(ref, &new_fb_ptr->buf);
|
||||||
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
|
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
|
||||||
alloc_frame_mvs(cm, new_fb);
|
alloc_frame_mvs(cm, new_fb);
|
||||||
@ -2993,14 +2996,15 @@ static void set_frame_size(VP10_COMP *cpi) {
|
|||||||
alloc_frame_mvs(cm, cm->new_fb_idx);
|
alloc_frame_mvs(cm, cm->new_fb_idx);
|
||||||
|
|
||||||
// Reset the frame pointers to the current frame size.
|
// Reset the frame pointers to the current frame size.
|
||||||
vpx_realloc_frame_buffer(get_frame_new_buffer(cm),
|
if (vpx_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
|
||||||
cm->width, cm->height,
|
|
||||||
cm->subsampling_x, cm->subsampling_y,
|
cm->subsampling_x, cm->subsampling_y,
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
cm->use_highbitdepth,
|
cm->use_highbitdepth,
|
||||||
#endif
|
#endif
|
||||||
VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
|
VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL))
|
||||||
|
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||||
|
"Failed to allocate frame buffer");
|
||||||
|
|
||||||
alloc_util_frame_buffers(cpi);
|
alloc_util_frame_buffers(cpi);
|
||||||
init_motion_estimation(cpi);
|
init_motion_estimation(cpi);
|
||||||
@ -3816,12 +3820,14 @@ static void setup_denoiser_buffer(VP10_COMP *cpi) {
|
|||||||
VP10_COMMON *const cm = &cpi->common;
|
VP10_COMMON *const cm = &cpi->common;
|
||||||
if (cpi->oxcf.noise_sensitivity > 0 &&
|
if (cpi->oxcf.noise_sensitivity > 0 &&
|
||||||
!cpi->denoiser.frame_buffer_initialized) {
|
!cpi->denoiser.frame_buffer_initialized) {
|
||||||
vp10_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height,
|
if (vp10_denoiser_alloc(&cpi->denoiser, cm->width, cm->height,
|
||||||
cm->subsampling_x, cm->subsampling_y,
|
cm->subsampling_x, cm->subsampling_y,
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
cm->use_highbitdepth,
|
cm->use_highbitdepth,
|
||||||
#endif
|
#endif
|
||||||
VP9_ENC_BORDER_IN_PIXELS);
|
VP9_ENC_BORDER_IN_PIXELS))
|
||||||
|
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||||
|
"Failed to allocate denoiser");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -3829,21 +3835,15 @@ static void setup_denoiser_buffer(VP10_COMP *cpi) {
|
|||||||
int vp10_receive_raw_frame(VP10_COMP *cpi, unsigned int frame_flags,
|
int vp10_receive_raw_frame(VP10_COMP *cpi, unsigned int frame_flags,
|
||||||
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
|
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
|
||||||
int64_t end_time) {
|
int64_t end_time) {
|
||||||
VP10_COMMON *volatile const cm = &cpi->common;
|
VP10_COMMON *const cm = &cpi->common;
|
||||||
struct vpx_usec_timer timer;
|
struct vpx_usec_timer timer;
|
||||||
volatile int res = 0;
|
int res = 0;
|
||||||
const int subsampling_x = sd->subsampling_x;
|
const int subsampling_x = sd->subsampling_x;
|
||||||
const int subsampling_y = sd->subsampling_y;
|
const int subsampling_y = sd->subsampling_y;
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
|
const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (setjmp(cm->error.jmp)) {
|
|
||||||
cm->error.setjmp = 0;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
cm->error.setjmp = 1;
|
|
||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
|
check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
|
||||||
#else
|
#else
|
||||||
@ -3877,7 +3877,6 @@ int vp10_receive_raw_frame(VP10_COMP *cpi, unsigned int frame_flags,
|
|||||||
res = -1;
|
res = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
cm->error.setjmp = 0;
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -461,6 +461,7 @@ static void resize_multistep(const uint8_t *const input,
|
|||||||
int filteredlength = length;
|
int filteredlength = length;
|
||||||
if (!tmpbuf) {
|
if (!tmpbuf) {
|
||||||
tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) * length);
|
tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) * length);
|
||||||
|
if (tmpbuf == NULL) return;
|
||||||
otmp = tmpbuf;
|
otmp = tmpbuf;
|
||||||
} else {
|
} else {
|
||||||
otmp = buf;
|
otmp = buf;
|
||||||
@ -520,6 +521,7 @@ void vp10_resize_plane(const uint8_t *const input,
|
|||||||
uint8_t *tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) *
|
uint8_t *tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) *
|
||||||
(width < height ? height : width));
|
(width < height ? height : width));
|
||||||
uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * (height + height2));
|
uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * (height + height2));
|
||||||
|
if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL) goto Error;
|
||||||
assert(width > 0);
|
assert(width > 0);
|
||||||
assert(height > 0);
|
assert(height > 0);
|
||||||
assert(width2 > 0);
|
assert(width2 > 0);
|
||||||
@ -532,6 +534,8 @@ void vp10_resize_plane(const uint8_t *const input,
|
|||||||
resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf);
|
resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf);
|
||||||
fill_arr_to_col(output + i, out_stride, height2, arrbuf + height);
|
fill_arr_to_col(output + i, out_stride, height2, arrbuf + height);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Error:
|
||||||
free(intbuf);
|
free(intbuf);
|
||||||
free(tmpbuf);
|
free(tmpbuf);
|
||||||
free(arrbuf);
|
free(arrbuf);
|
||||||
@ -754,6 +758,7 @@ static void highbd_resize_multistep(const uint16_t *const input,
|
|||||||
int filteredlength = length;
|
int filteredlength = length;
|
||||||
if (!tmpbuf) {
|
if (!tmpbuf) {
|
||||||
tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) * length);
|
tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) * length);
|
||||||
|
if (tmpbuf == NULL) return;
|
||||||
otmp = tmpbuf;
|
otmp = tmpbuf;
|
||||||
} else {
|
} else {
|
||||||
otmp = buf;
|
otmp = buf;
|
||||||
@ -816,6 +821,7 @@ void vp10_highbd_resize_plane(const uint8_t *const input,
|
|||||||
uint16_t *tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) *
|
uint16_t *tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) *
|
||||||
(width < height ? height : width));
|
(width < height ? height : width));
|
||||||
uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * (height + height2));
|
uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * (height + height2));
|
||||||
|
if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL) goto Error;
|
||||||
for (i = 0; i < height; ++i) {
|
for (i = 0; i < height; ++i) {
|
||||||
highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width,
|
highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width,
|
||||||
intbuf + width2 * i, width2, tmpbuf, bd);
|
intbuf + width2 * i, width2, tmpbuf, bd);
|
||||||
@ -827,6 +833,8 @@ void vp10_highbd_resize_plane(const uint8_t *const input,
|
|||||||
highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2,
|
highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2,
|
||||||
arrbuf + height);
|
arrbuf + height);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Error:
|
||||||
free(intbuf);
|
free(intbuf);
|
||||||
free(tmpbuf);
|
free(tmpbuf);
|
||||||
free(arrbuf);
|
free(arrbuf);
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
#include "./vpx_config.h"
|
#include "./vpx_config.h"
|
||||||
#include "vpx/vpx_encoder.h"
|
#include "vpx/vpx_encoder.h"
|
||||||
#include "vpx_ports/vpx_once.h"
|
#include "vpx_ports/vpx_once.h"
|
||||||
|
#include "vpx_ports/system_state.h"
|
||||||
#include "vpx/internal/vpx_codec_internal.h"
|
#include "vpx/internal/vpx_codec_internal.h"
|
||||||
#include "./vpx_version.h"
|
#include "./vpx_version.h"
|
||||||
#include "vp10/encoder/encoder.h"
|
#include "vp10/encoder/encoder.h"
|
||||||
@ -873,18 +874,21 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
|
|||||||
const vpx_image_t *img,
|
const vpx_image_t *img,
|
||||||
vpx_codec_pts_t pts,
|
vpx_codec_pts_t pts,
|
||||||
unsigned long duration,
|
unsigned long duration,
|
||||||
vpx_enc_frame_flags_t flags,
|
vpx_enc_frame_flags_t enc_flags,
|
||||||
unsigned long deadline) {
|
unsigned long deadline) {
|
||||||
vpx_codec_err_t res = VPX_CODEC_OK;
|
volatile vpx_codec_err_t res = VPX_CODEC_OK;
|
||||||
|
volatile vpx_enc_frame_flags_t flags = enc_flags;
|
||||||
VP10_COMP *const cpi = ctx->cpi;
|
VP10_COMP *const cpi = ctx->cpi;
|
||||||
const vpx_rational_t *const timebase = &ctx->cfg.g_timebase;
|
const vpx_rational_t *const timebase = &ctx->cfg.g_timebase;
|
||||||
size_t data_sz;
|
size_t data_sz;
|
||||||
|
|
||||||
|
if (cpi == NULL) return VPX_CODEC_INVALID_PARAM;
|
||||||
|
|
||||||
if (img != NULL) {
|
if (img != NULL) {
|
||||||
res = validate_img(ctx, img);
|
res = validate_img(ctx, img);
|
||||||
// TODO(jzern) the checks related to cpi's validity should be treated as a
|
// TODO(jzern) the checks related to cpi's validity should be treated as a
|
||||||
// failure condition, encoder setup is done fully in init() currently.
|
// failure condition, encoder setup is done fully in init() currently.
|
||||||
if (res == VPX_CODEC_OK && cpi != NULL) {
|
if (res == VPX_CODEC_OK) {
|
||||||
// There's no codec control for multiple alt-refs so check the encoder
|
// There's no codec control for multiple alt-refs so check the encoder
|
||||||
// instance for its status to determine the compressed data size.
|
// instance for its status to determine the compressed data size.
|
||||||
data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 *
|
data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 *
|
||||||
@ -912,6 +916,14 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
|
|||||||
return VPX_CODEC_INVALID_PARAM;
|
return VPX_CODEC_INVALID_PARAM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (setjmp(cpi->common.error.jmp)) {
|
||||||
|
cpi->common.error.setjmp = 0;
|
||||||
|
res = update_error_state(ctx, &cpi->common.error);
|
||||||
|
vpx_clear_system_state();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
cpi->common.error.setjmp = 1;
|
||||||
|
|
||||||
vp10_apply_encoding_flags(cpi, flags);
|
vp10_apply_encoding_flags(cpi, flags);
|
||||||
|
|
||||||
// Handle fixed keyframe intervals
|
// Handle fixed keyframe intervals
|
||||||
@ -923,8 +935,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the encoder instance on the first frame.
|
if (res == VPX_CODEC_OK) {
|
||||||
if (res == VPX_CODEC_OK && cpi != NULL) {
|
|
||||||
unsigned int lib_flags = 0;
|
unsigned int lib_flags = 0;
|
||||||
YV12_BUFFER_CONFIG sd;
|
YV12_BUFFER_CONFIG sd;
|
||||||
int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts);
|
int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts);
|
||||||
@ -963,7 +974,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
|
|||||||
* the buffer size anyway.
|
* the buffer size anyway.
|
||||||
*/
|
*/
|
||||||
if (cx_data_sz < ctx->cx_data_sz / 2) {
|
if (cx_data_sz < ctx->cx_data_sz / 2) {
|
||||||
ctx->base.err_detail = "Compressed data buffer too small";
|
vpx_internal_error(&cpi->common.error, VPX_CODEC_ERROR,
|
||||||
|
"Compressed data buffer too small");
|
||||||
return VPX_CODEC_ERROR;
|
return VPX_CODEC_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1040,6 +1052,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cpi->common.error.setjmp = 0;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -440,6 +440,11 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
|
|||||||
denoiser->yv12_last_source.frame_size);
|
denoiser->yv12_last_source.frame_size);
|
||||||
|
|
||||||
denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1);
|
denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1);
|
||||||
|
if (!denoiser->denoise_state)
|
||||||
|
{
|
||||||
|
vp8_denoiser_free(denoiser);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
|
memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
|
||||||
vp8_denoiser_set_parameters(denoiser, mode);
|
vp8_denoiser_set_parameters(denoiser, mode);
|
||||||
denoiser->nmse_source_diff = 0;
|
denoiser->nmse_source_diff = 0;
|
||||||
|
@ -1318,9 +1318,11 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
|
|||||||
#if CONFIG_TEMPORAL_DENOISING
|
#if CONFIG_TEMPORAL_DENOISING
|
||||||
if (cpi->oxcf.noise_sensitivity > 0) {
|
if (cpi->oxcf.noise_sensitivity > 0) {
|
||||||
vp8_denoiser_free(&cpi->denoiser);
|
vp8_denoiser_free(&cpi->denoiser);
|
||||||
vp8_denoiser_allocate(&cpi->denoiser, width, height,
|
if (vp8_denoiser_allocate(&cpi->denoiser, width, height,
|
||||||
cm->mb_rows, cm->mb_cols,
|
cm->mb_rows, cm->mb_cols,
|
||||||
cpi->oxcf.noise_sensitivity);
|
cpi->oxcf.noise_sensitivity))
|
||||||
|
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
|
||||||
|
"Failed to allocate denoiser");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -1832,9 +1834,11 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
|
|||||||
{
|
{
|
||||||
int width = (cpi->oxcf.Width + 15) & ~15;
|
int width = (cpi->oxcf.Width + 15) & ~15;
|
||||||
int height = (cpi->oxcf.Height + 15) & ~15;
|
int height = (cpi->oxcf.Height + 15) & ~15;
|
||||||
vp8_denoiser_allocate(&cpi->denoiser, width, height,
|
if (vp8_denoiser_allocate(&cpi->denoiser, width, height,
|
||||||
cm->mb_rows, cm->mb_cols,
|
cm->mb_rows, cm->mb_cols,
|
||||||
cpi->oxcf.noise_sensitivity);
|
cpi->oxcf.noise_sensitivity))
|
||||||
|
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
|
||||||
|
"Failed to allocate denoiser");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -67,10 +67,11 @@ struct vpx_codec_alg_priv
|
|||||||
FRAGMENT_DATA fragments;
|
FRAGMENT_DATA fragments;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void vp8_init_ctx(vpx_codec_ctx_t *ctx)
|
static int vp8_init_ctx(vpx_codec_ctx_t *ctx)
|
||||||
{
|
{
|
||||||
vpx_codec_alg_priv_t *priv =
|
vpx_codec_alg_priv_t *priv =
|
||||||
(vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv));
|
(vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv));
|
||||||
|
if (!priv) return 1;
|
||||||
|
|
||||||
ctx->priv = (vpx_codec_priv_t *)priv;
|
ctx->priv = (vpx_codec_priv_t *)priv;
|
||||||
ctx->priv->init_flags = ctx->init_flags;
|
ctx->priv->init_flags = ctx->init_flags;
|
||||||
@ -85,6 +86,8 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx)
|
|||||||
priv->cfg = *ctx->config.dec;
|
priv->cfg = *ctx->config.dec;
|
||||||
ctx->config.dec = &priv->cfg;
|
ctx->config.dec = &priv->cfg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
|
static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
|
||||||
@ -103,7 +106,7 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
|
|||||||
* information becomes known.
|
* information becomes known.
|
||||||
*/
|
*/
|
||||||
if (!ctx->priv) {
|
if (!ctx->priv) {
|
||||||
vp8_init_ctx(ctx);
|
if (vp8_init_ctx(ctx)) return VPX_CODEC_MEM_ERROR;
|
||||||
priv = (vpx_codec_alg_priv_t *)ctx->priv;
|
priv = (vpx_codec_alg_priv_t *)ctx->priv;
|
||||||
|
|
||||||
/* initialize number of fragments to zero */
|
/* initialize number of fragments to zero */
|
||||||
|
@ -728,10 +728,8 @@ static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) {
|
static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) {
|
||||||
// TODO(aconverse): model[PIVOT_NODE] should never be zero.
|
assert(p != 0);
|
||||||
// https://code.google.com/p/webm/issues/detail?id=1089
|
memcpy(probs, vp9_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob));
|
||||||
memcpy(probs, vp9_pareto8_full[p == 0 ? 254 : p - 1],
|
|
||||||
MODEL_NODES * sizeof(vpx_prob));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) {
|
void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) {
|
||||||
|
@ -324,7 +324,6 @@ static void filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
|
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
|
||||||
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
|
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
|
||||||
|
|
||||||
// TODO(yunqingwang): count in loopfilter functions should be removed.
|
|
||||||
if (mask & 1) {
|
if (mask & 1) {
|
||||||
if ((mask_16x16_0 | mask_16x16_1) & 1) {
|
if ((mask_16x16_0 | mask_16x16_1) & 1) {
|
||||||
if ((mask_16x16_0 & mask_16x16_1) & 1) {
|
if ((mask_16x16_0 & mask_16x16_1) & 1) {
|
||||||
@ -345,11 +344,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
||||||
lfi1->hev_thr);
|
lfi1->hev_thr);
|
||||||
} else if (mask_8x8_0 & 1) {
|
} else if (mask_8x8_0 & 1) {
|
||||||
vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
|
vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
|
||||||
1);
|
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
|
vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
|
||||||
lfi1->hev_thr, 1);
|
lfi1->hev_thr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -359,11 +357,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
||||||
lfi1->hev_thr);
|
lfi1->hev_thr);
|
||||||
} else if (mask_4x4_0 & 1) {
|
} else if (mask_4x4_0 & 1) {
|
||||||
vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
|
vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
|
||||||
1);
|
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
|
vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
|
||||||
lfi1->hev_thr, 1);
|
lfi1->hev_thr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -374,10 +371,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
lfi1->hev_thr);
|
lfi1->hev_thr);
|
||||||
} else if (mask_4x4_int_0 & 1) {
|
} else if (mask_4x4_int_0 & 1) {
|
||||||
vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
|
vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
|
||||||
lfi0->hev_thr, 1);
|
lfi0->hev_thr);
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
|
vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
|
||||||
lfi1->hev_thr, 1);
|
lfi1->hev_thr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -424,7 +421,6 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
|
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
|
||||||
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
|
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
|
||||||
|
|
||||||
// TODO(yunqingwang): count in loopfilter functions should be removed.
|
|
||||||
if (mask & 1) {
|
if (mask & 1) {
|
||||||
if ((mask_16x16_0 | mask_16x16_1) & 1) {
|
if ((mask_16x16_0 | mask_16x16_1) & 1) {
|
||||||
if ((mask_16x16_0 & mask_16x16_1) & 1) {
|
if ((mask_16x16_0 & mask_16x16_1) & 1) {
|
||||||
@ -446,10 +442,10 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
lfi1->hev_thr, bd);
|
lfi1->hev_thr, bd);
|
||||||
} else if (mask_8x8_0 & 1) {
|
} else if (mask_8x8_0 & 1) {
|
||||||
vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
|
vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
|
||||||
lfi0->hev_thr, 1, bd);
|
lfi0->hev_thr, bd);
|
||||||
} else {
|
} else {
|
||||||
vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
|
vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
|
||||||
lfi1->lim, lfi1->hev_thr, 1, bd);
|
lfi1->lim, lfi1->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -460,10 +456,10 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
lfi1->hev_thr, bd);
|
lfi1->hev_thr, bd);
|
||||||
} else if (mask_4x4_0 & 1) {
|
} else if (mask_4x4_0 & 1) {
|
||||||
vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
|
vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
|
||||||
lfi0->hev_thr, 1, bd);
|
lfi0->hev_thr, bd);
|
||||||
} else {
|
} else {
|
||||||
vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
|
vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
|
||||||
lfi1->lim, lfi1->hev_thr, 1, bd);
|
lfi1->lim, lfi1->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -474,10 +470,10 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor,
|
|||||||
lfi1->hev_thr, bd);
|
lfi1->hev_thr, bd);
|
||||||
} else if (mask_4x4_int_0 & 1) {
|
} else if (mask_4x4_int_0 & 1) {
|
||||||
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
|
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
|
||||||
lfi0->hev_thr, 1, bd);
|
lfi0->hev_thr, bd);
|
||||||
} else {
|
} else {
|
||||||
vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
|
vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
|
||||||
lfi1->lim, lfi1->hev_thr, 1, bd);
|
lfi1->lim, lfi1->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -514,12 +510,12 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||||||
if (mask & 1) {
|
if (mask & 1) {
|
||||||
if (mask_16x16 & 1) {
|
if (mask_16x16 & 1) {
|
||||||
if ((mask_16x16 & 3) == 3) {
|
if ((mask_16x16 & 3) == 3) {
|
||||||
vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 2);
|
lfi->hev_thr);
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
} else if (mask_8x8 & 1) {
|
} else if (mask_8x8 & 1) {
|
||||||
if ((mask_8x8 & 3) == 3) {
|
if ((mask_8x8 & 3) == 3) {
|
||||||
@ -537,18 +533,18 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||||||
} else {
|
} else {
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
else if (mask_4x4_int & 2)
|
else if (mask_4x4_int & 2)
|
||||||
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
||||||
lfin->lim, lfin->hev_thr, 1);
|
lfin->lim, lfin->hev_thr);
|
||||||
}
|
}
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
|
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
} else if (mask_4x4 & 1) {
|
} else if (mask_4x4 & 1) {
|
||||||
if ((mask_4x4 & 3) == 3) {
|
if ((mask_4x4 & 3) == 3) {
|
||||||
@ -565,22 +561,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||||||
} else {
|
} else {
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
else if (mask_4x4_int & 2)
|
else if (mask_4x4_int & 2)
|
||||||
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
||||||
lfin->lim, lfin->hev_thr, 1);
|
lfin->lim, lfin->hev_thr);
|
||||||
}
|
}
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
|
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
} else if (mask_4x4_int & 1) {
|
} else if (mask_4x4_int & 1) {
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s += 8 * count;
|
s += 8 * count;
|
||||||
@ -611,12 +607,12 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
|
|||||||
if (mask & 1) {
|
if (mask & 1) {
|
||||||
if (mask_16x16 & 1) {
|
if (mask_16x16 & 1) {
|
||||||
if ((mask_16x16 & 3) == 3) {
|
if ((mask_16x16 & 3) == 3) {
|
||||||
vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 2, bd);
|
lfi->hev_thr, bd);
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
}
|
}
|
||||||
} else if (mask_8x8 & 1) {
|
} else if (mask_8x8 & 1) {
|
||||||
if ((mask_8x8 & 3) == 3) {
|
if ((mask_8x8 & 3) == 3) {
|
||||||
@ -635,20 +631,20 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
|
|||||||
} else {
|
} else {
|
||||||
if (mask_4x4_int & 1) {
|
if (mask_4x4_int & 1) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
||||||
lfi->lim, lfi->hev_thr, 1, bd);
|
lfi->lim, lfi->hev_thr, bd);
|
||||||
} else if (mask_4x4_int & 2) {
|
} else if (mask_4x4_int & 2) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
||||||
lfin->lim, lfin->hev_thr, 1, bd);
|
lfin->lim, lfin->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
|
|
||||||
if (mask_4x4_int & 1) {
|
if (mask_4x4_int & 1) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
||||||
lfi->lim, lfi->hev_thr, 1, bd);
|
lfi->lim, lfi->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (mask_4x4 & 1) {
|
} else if (mask_4x4 & 1) {
|
||||||
@ -667,25 +663,25 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
|
|||||||
} else {
|
} else {
|
||||||
if (mask_4x4_int & 1) {
|
if (mask_4x4_int & 1) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
||||||
lfi->lim, lfi->hev_thr, 1, bd);
|
lfi->lim, lfi->hev_thr, bd);
|
||||||
} else if (mask_4x4_int & 2) {
|
} else if (mask_4x4_int & 2) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
||||||
lfin->lim, lfin->hev_thr, 1, bd);
|
lfin->lim, lfin->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
|
|
||||||
if (mask_4x4_int & 1) {
|
if (mask_4x4_int & 1) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
|
||||||
lfi->lim, lfi->hev_thr, 1, bd);
|
lfi->lim, lfi->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (mask_4x4_int & 1) {
|
} else if (mask_4x4_int & 1) {
|
||||||
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s += 8 * count;
|
s += 8 * count;
|
||||||
@ -1102,13 +1098,13 @@ static void filter_selectively_vert(uint8_t *s, int pitch,
|
|||||||
if (mask_16x16 & 1) {
|
if (mask_16x16 & 1) {
|
||||||
vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
} else if (mask_8x8 & 1) {
|
} else if (mask_8x8 & 1) {
|
||||||
vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
} else if (mask_4x4 & 1) {
|
} else if (mask_4x4 & 1) {
|
||||||
vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
s += 8;
|
s += 8;
|
||||||
lfl += 1;
|
lfl += 1;
|
||||||
mask_16x16 >>= 1;
|
mask_16x16 >>= 1;
|
||||||
@ -1138,15 +1134,15 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch,
|
|||||||
lfi->hev_thr, bd);
|
lfi->hev_thr, bd);
|
||||||
} else if (mask_8x8 & 1) {
|
} else if (mask_8x8 & 1) {
|
||||||
vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
} else if (mask_4x4 & 1) {
|
} else if (mask_4x4 & 1) {
|
||||||
vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1, bd);
|
lfi->hev_thr, bd);
|
||||||
s += 8;
|
s += 8;
|
||||||
lfl += 1;
|
lfl += 1;
|
||||||
mask_16x16 >>= 1;
|
mask_16x16 >>= 1;
|
||||||
|
@ -1221,8 +1221,9 @@ static void resize_mv_buffer(VP9_COMMON *cm) {
|
|||||||
vpx_free(cm->cur_frame->mvs);
|
vpx_free(cm->cur_frame->mvs);
|
||||||
cm->cur_frame->mi_rows = cm->mi_rows;
|
cm->cur_frame->mi_rows = cm->mi_rows;
|
||||||
cm->cur_frame->mi_cols = cm->mi_cols;
|
cm->cur_frame->mi_cols = cm->mi_cols;
|
||||||
cm->cur_frame->mvs = (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
|
CHECK_MEM_ERROR(cm, cm->cur_frame->mvs,
|
||||||
sizeof(*cm->cur_frame->mvs));
|
(MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
|
||||||
|
sizeof(*cm->cur_frame->mvs)));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void resize_context_buffers(VP9_COMMON *cm, int width, int height) {
|
static void resize_context_buffers(VP9_COMMON *cm, int width, int height) {
|
||||||
|
@ -29,13 +29,13 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
|
|||||||
|
|
||||||
cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map));
|
cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map));
|
||||||
if (cr->map == NULL) {
|
if (cr->map == NULL) {
|
||||||
vpx_free(cr);
|
vp9_cyclic_refresh_free(cr);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
|
last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
|
||||||
cr->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
|
cr->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
|
||||||
if (cr->last_coded_q_map == NULL) {
|
if (cr->last_coded_q_map == NULL) {
|
||||||
vpx_free(cr);
|
vp9_cyclic_refresh_free(cr);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
assert(MAXQ <= 255);
|
assert(MAXQ <= 255);
|
||||||
@ -44,7 +44,7 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
|
|||||||
consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv);
|
consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv);
|
||||||
cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size);
|
cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size);
|
||||||
if (cr->consec_zero_mv == NULL) {
|
if (cr->consec_zero_mv == NULL) {
|
||||||
vpx_free(cr);
|
vp9_cyclic_refresh_free(cr);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
memset(cr->consec_zero_mv, 0, consec_zero_mv_size);
|
memset(cr->consec_zero_mv, 0, consec_zero_mv_size);
|
||||||
|
@ -12,9 +12,8 @@
|
|||||||
#include "vp9/encoder/vp9_cost.h"
|
#include "vp9/encoder/vp9_cost.h"
|
||||||
|
|
||||||
/* round(-log2(i/256.) * (1 << VP9_PROB_COST_SHIFT))
|
/* round(-log2(i/256.) * (1 << VP9_PROB_COST_SHIFT))
|
||||||
Begins and ends with a bogus entry to satisfy use of prob=0 in the firstpass.
|
Begins with a bogus entry for simpler addressing. */
|
||||||
https://code.google.com/p/webm/issues/detail?id=1089 */
|
const uint16_t vp9_prob_cost[256] = {
|
||||||
const uint16_t vp9_prob_cost[257] = {
|
|
||||||
4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325,
|
4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325,
|
||||||
2260, 2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780,
|
2260, 2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780,
|
||||||
1748, 1718, 1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470,
|
1748, 1718, 1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470,
|
||||||
@ -36,13 +35,14 @@ const uint16_t vp9_prob_cost[257] = {
|
|||||||
125, 122, 119, 115, 112, 109, 105, 102, 99, 95, 92, 89,
|
125, 122, 119, 115, 112, 109, 105, 102, 99, 95, 92, 89,
|
||||||
86, 82, 79, 76, 73, 70, 66, 63, 60, 57, 54, 51,
|
86, 82, 79, 76, 73, 70, 66, 63, 60, 57, 54, 51,
|
||||||
48, 45, 42, 38, 35, 32, 29, 26, 23, 20, 18, 15,
|
48, 45, 42, 38, 35, 32, 29, 26, 23, 20, 18, 15,
|
||||||
12, 9, 6, 3, 3};
|
12, 9, 6, 3};
|
||||||
|
|
||||||
static void cost(int *costs, vpx_tree tree, const vpx_prob *probs,
|
static void cost(int *costs, vpx_tree tree, const vpx_prob *probs,
|
||||||
int i, int c) {
|
int i, int c) {
|
||||||
const vpx_prob prob = probs[i / 2];
|
const vpx_prob prob = probs[i / 2];
|
||||||
int b;
|
int b;
|
||||||
|
|
||||||
|
assert(prob != 0);
|
||||||
for (b = 0; b <= 1; ++b) {
|
for (b = 0; b <= 1; ++b) {
|
||||||
const int cc = c + vp9_cost_bit(prob, b);
|
const int cc = c + vp9_cost_bit(prob, b);
|
||||||
const vpx_tree_index ii = tree[i + b];
|
const vpx_tree_index ii = tree[i + b];
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern const uint16_t vp9_prob_cost[257];
|
extern const uint16_t vp9_prob_cost[256];
|
||||||
|
|
||||||
// The factor to scale from cost in bits to cost in vp9_prob_cost units.
|
// The factor to scale from cost in bits to cost in vp9_prob_cost units.
|
||||||
#define VP9_PROB_COST_SHIFT 9
|
#define VP9_PROB_COST_SHIFT 9
|
||||||
|
@ -1766,8 +1766,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (cpi->b_calculate_consistency) {
|
if (cpi->b_calculate_consistency) {
|
||||||
cpi->ssim_vars = vpx_malloc(sizeof(*cpi->ssim_vars) *
|
CHECK_MEM_ERROR(cm, cpi->ssim_vars,
|
||||||
4 * cpi->common.mi_rows * cpi->common.mi_cols);
|
vpx_malloc(sizeof(*cpi->ssim_vars) * 4 *
|
||||||
|
cpi->common.mi_rows * cpi->common.mi_cols));
|
||||||
cpi->worst_consistency = 100.0;
|
cpi->worst_consistency = 100.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2631,16 +2632,16 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
|
|||||||
vpx_extend_frame_inner_borders(cm->frame_to_show);
|
vpx_extend_frame_inner_borders(cm->frame_to_show);
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE void alloc_frame_mvs(const VP9_COMMON *cm,
|
static INLINE void alloc_frame_mvs(VP9_COMMON *const cm,
|
||||||
int buffer_idx) {
|
int buffer_idx) {
|
||||||
RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
|
RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
|
||||||
if (new_fb_ptr->mvs == NULL ||
|
if (new_fb_ptr->mvs == NULL ||
|
||||||
new_fb_ptr->mi_rows < cm->mi_rows ||
|
new_fb_ptr->mi_rows < cm->mi_rows ||
|
||||||
new_fb_ptr->mi_cols < cm->mi_cols) {
|
new_fb_ptr->mi_cols < cm->mi_cols) {
|
||||||
vpx_free(new_fb_ptr->mvs);
|
vpx_free(new_fb_ptr->mvs);
|
||||||
new_fb_ptr->mvs =
|
CHECK_MEM_ERROR(cm, new_fb_ptr->mvs,
|
||||||
(MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
|
(MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
|
||||||
sizeof(*new_fb_ptr->mvs));
|
sizeof(*new_fb_ptr->mvs)));
|
||||||
new_fb_ptr->mi_rows = cm->mi_rows;
|
new_fb_ptr->mi_rows = cm->mi_rows;
|
||||||
new_fb_ptr->mi_cols = cm->mi_cols;
|
new_fb_ptr->mi_cols = cm->mi_cols;
|
||||||
}
|
}
|
||||||
@ -2678,12 +2679,13 @@ void vp9_scale_references(VP9_COMP *cpi) {
|
|||||||
if (force_scaling ||
|
if (force_scaling ||
|
||||||
new_fb_ptr->buf.y_crop_width != cm->width ||
|
new_fb_ptr->buf.y_crop_width != cm->width ||
|
||||||
new_fb_ptr->buf.y_crop_height != cm->height) {
|
new_fb_ptr->buf.y_crop_height != cm->height) {
|
||||||
vpx_realloc_frame_buffer(&new_fb_ptr->buf,
|
if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
|
||||||
cm->width, cm->height,
|
|
||||||
cm->subsampling_x, cm->subsampling_y,
|
cm->subsampling_x, cm->subsampling_y,
|
||||||
cm->use_highbitdepth,
|
cm->use_highbitdepth,
|
||||||
VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
|
VP9_ENC_BORDER_IN_PIXELS,
|
||||||
NULL, NULL, NULL);
|
cm->byte_alignment, NULL, NULL, NULL))
|
||||||
|
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||||
|
"Failed to allocate frame buffer");
|
||||||
scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth);
|
scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth);
|
||||||
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
|
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
|
||||||
alloc_frame_mvs(cm, new_fb);
|
alloc_frame_mvs(cm, new_fb);
|
||||||
@ -2703,11 +2705,12 @@ void vp9_scale_references(VP9_COMP *cpi) {
|
|||||||
if (force_scaling ||
|
if (force_scaling ||
|
||||||
new_fb_ptr->buf.y_crop_width != cm->width ||
|
new_fb_ptr->buf.y_crop_width != cm->width ||
|
||||||
new_fb_ptr->buf.y_crop_height != cm->height) {
|
new_fb_ptr->buf.y_crop_height != cm->height) {
|
||||||
vpx_realloc_frame_buffer(&new_fb_ptr->buf,
|
if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
|
||||||
cm->width, cm->height,
|
|
||||||
cm->subsampling_x, cm->subsampling_y,
|
cm->subsampling_x, cm->subsampling_y,
|
||||||
VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
|
VP9_ENC_BORDER_IN_PIXELS,
|
||||||
NULL, NULL, NULL);
|
cm->byte_alignment, NULL, NULL, NULL))
|
||||||
|
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||||
|
"Failed to allocate frame buffer");
|
||||||
vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf);
|
vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf);
|
||||||
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
|
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
|
||||||
alloc_frame_mvs(cm, new_fb);
|
alloc_frame_mvs(cm, new_fb);
|
||||||
@ -2954,12 +2957,14 @@ static void setup_denoiser_buffer(VP9_COMP *cpi) {
|
|||||||
VP9_COMMON *const cm = &cpi->common;
|
VP9_COMMON *const cm = &cpi->common;
|
||||||
if (cpi->oxcf.noise_sensitivity > 0 &&
|
if (cpi->oxcf.noise_sensitivity > 0 &&
|
||||||
!cpi->denoiser.frame_buffer_initialized) {
|
!cpi->denoiser.frame_buffer_initialized) {
|
||||||
vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height,
|
if (vp9_denoiser_alloc(&cpi->denoiser, cm->width, cm->height,
|
||||||
cm->subsampling_x, cm->subsampling_y,
|
cm->subsampling_x, cm->subsampling_y,
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
cm->use_highbitdepth,
|
cm->use_highbitdepth,
|
||||||
#endif
|
#endif
|
||||||
VP9_ENC_BORDER_IN_PIXELS);
|
VP9_ENC_BORDER_IN_PIXELS))
|
||||||
|
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||||
|
"Failed to allocate denoiser");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -3032,14 +3037,15 @@ static void set_frame_size(VP9_COMP *cpi) {
|
|||||||
alloc_frame_mvs(cm, cm->new_fb_idx);
|
alloc_frame_mvs(cm, cm->new_fb_idx);
|
||||||
|
|
||||||
// Reset the frame pointers to the current frame size.
|
// Reset the frame pointers to the current frame size.
|
||||||
vpx_realloc_frame_buffer(get_frame_new_buffer(cm),
|
if (vpx_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
|
||||||
cm->width, cm->height,
|
|
||||||
cm->subsampling_x, cm->subsampling_y,
|
cm->subsampling_x, cm->subsampling_y,
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
cm->use_highbitdepth,
|
cm->use_highbitdepth,
|
||||||
#endif
|
#endif
|
||||||
VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
|
VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL))
|
||||||
|
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||||
|
"Failed to allocate frame buffer");
|
||||||
|
|
||||||
alloc_util_frame_buffers(cpi);
|
alloc_util_frame_buffers(cpi);
|
||||||
init_motion_estimation(cpi);
|
init_motion_estimation(cpi);
|
||||||
@ -3889,21 +3895,15 @@ static void check_initial_width(VP9_COMP *cpi,
|
|||||||
int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
|
int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
|
||||||
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
|
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
|
||||||
int64_t end_time) {
|
int64_t end_time) {
|
||||||
VP9_COMMON *volatile const cm = &cpi->common;
|
VP9_COMMON *const cm = &cpi->common;
|
||||||
struct vpx_usec_timer timer;
|
struct vpx_usec_timer timer;
|
||||||
volatile int res = 0;
|
int res = 0;
|
||||||
const int subsampling_x = sd->subsampling_x;
|
const int subsampling_x = sd->subsampling_x;
|
||||||
const int subsampling_y = sd->subsampling_y;
|
const int subsampling_y = sd->subsampling_y;
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
|
const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (setjmp(cm->error.jmp)) {
|
|
||||||
cm->error.setjmp = 0;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
cm->error.setjmp = 1;
|
|
||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
|
check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
|
||||||
#else
|
#else
|
||||||
@ -3937,7 +3937,6 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
|
|||||||
res = -1;
|
res = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
cm->error.setjmp = 0;
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -286,6 +286,13 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
|
|||||||
set_block_thresholds(cm, rd);
|
set_block_thresholds(cm, rd);
|
||||||
set_partition_probs(cm, xd);
|
set_partition_probs(cm, xd);
|
||||||
|
|
||||||
|
if (cpi->oxcf.pass == 1) {
|
||||||
|
if (!frame_is_intra_only(cm))
|
||||||
|
vp9_build_nmv_cost_table(
|
||||||
|
x->nmvjointcost,
|
||||||
|
cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
|
||||||
|
&cm->fc->nmvc, cm->allow_high_precision_mv);
|
||||||
|
} else {
|
||||||
if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
|
if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
|
||||||
fill_token_costs(x->token_costs, cm->fc->coef_probs);
|
fill_token_costs(x->token_costs, cm->fc->coef_probs);
|
||||||
|
|
||||||
@ -301,9 +308,9 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
|
|||||||
fill_mode_costs(cpi);
|
fill_mode_costs(cpi);
|
||||||
|
|
||||||
if (!frame_is_intra_only(cm)) {
|
if (!frame_is_intra_only(cm)) {
|
||||||
vp9_build_nmv_cost_table(x->nmvjointcost,
|
vp9_build_nmv_cost_table(
|
||||||
cm->allow_high_precision_mv ? x->nmvcost_hp
|
x->nmvjointcost,
|
||||||
: x->nmvcost,
|
cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
|
||||||
&cm->fc->nmvc, cm->allow_high_precision_mv);
|
&cm->fc->nmvc, cm->allow_high_precision_mv);
|
||||||
|
|
||||||
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
|
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
|
||||||
@ -312,6 +319,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
|
static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
|
||||||
// NOTE: The tables below must be of the same size.
|
// NOTE: The tables below must be of the same size.
|
||||||
|
@ -462,6 +462,7 @@ static void resize_multistep(const uint8_t *const input,
|
|||||||
int filteredlength = length;
|
int filteredlength = length;
|
||||||
if (!tmpbuf) {
|
if (!tmpbuf) {
|
||||||
tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) * length);
|
tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) * length);
|
||||||
|
if (tmpbuf == NULL) return;
|
||||||
otmp = tmpbuf;
|
otmp = tmpbuf;
|
||||||
} else {
|
} else {
|
||||||
otmp = buf;
|
otmp = buf;
|
||||||
@ -521,6 +522,7 @@ void vp9_resize_plane(const uint8_t *const input,
|
|||||||
uint8_t *tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) *
|
uint8_t *tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) *
|
||||||
(width < height ? height : width));
|
(width < height ? height : width));
|
||||||
uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * (height + height2));
|
uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * (height + height2));
|
||||||
|
if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL) goto Error;
|
||||||
assert(width > 0);
|
assert(width > 0);
|
||||||
assert(height > 0);
|
assert(height > 0);
|
||||||
assert(width2 > 0);
|
assert(width2 > 0);
|
||||||
@ -533,6 +535,8 @@ void vp9_resize_plane(const uint8_t *const input,
|
|||||||
resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf);
|
resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf);
|
||||||
fill_arr_to_col(output + i, out_stride, height2, arrbuf + height);
|
fill_arr_to_col(output + i, out_stride, height2, arrbuf + height);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Error:
|
||||||
free(intbuf);
|
free(intbuf);
|
||||||
free(tmpbuf);
|
free(tmpbuf);
|
||||||
free(arrbuf);
|
free(arrbuf);
|
||||||
@ -755,6 +759,7 @@ static void highbd_resize_multistep(const uint16_t *const input,
|
|||||||
int filteredlength = length;
|
int filteredlength = length;
|
||||||
if (!tmpbuf) {
|
if (!tmpbuf) {
|
||||||
tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) * length);
|
tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) * length);
|
||||||
|
if (tmpbuf == NULL) return;
|
||||||
otmp = tmpbuf;
|
otmp = tmpbuf;
|
||||||
} else {
|
} else {
|
||||||
otmp = buf;
|
otmp = buf;
|
||||||
@ -817,6 +822,7 @@ void vp9_highbd_resize_plane(const uint8_t *const input,
|
|||||||
uint16_t *tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) *
|
uint16_t *tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) *
|
||||||
(width < height ? height : width));
|
(width < height ? height : width));
|
||||||
uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * (height + height2));
|
uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * (height + height2));
|
||||||
|
if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL) goto Error;
|
||||||
for (i = 0; i < height; ++i) {
|
for (i = 0; i < height; ++i) {
|
||||||
highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width,
|
highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width,
|
||||||
intbuf + width2 * i, width2, tmpbuf, bd);
|
intbuf + width2 * i, width2, tmpbuf, bd);
|
||||||
@ -828,6 +834,8 @@ void vp9_highbd_resize_plane(const uint8_t *const input,
|
|||||||
highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2,
|
highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2,
|
||||||
arrbuf + height);
|
arrbuf + height);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Error:
|
||||||
free(intbuf);
|
free(intbuf);
|
||||||
free(tmpbuf);
|
free(tmpbuf);
|
||||||
free(arrbuf);
|
free(arrbuf);
|
||||||
|
@ -118,15 +118,20 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
|
|||||||
tl == 0) {
|
tl == 0) {
|
||||||
size_t last_coded_q_map_size;
|
size_t last_coded_q_map_size;
|
||||||
size_t consec_zero_mv_size;
|
size_t consec_zero_mv_size;
|
||||||
|
VP9_COMMON *const cm = &cpi->common;
|
||||||
lc->sb_index = 0;
|
lc->sb_index = 0;
|
||||||
lc->map = vpx_malloc(mi_rows * mi_cols * sizeof(signed char));
|
CHECK_MEM_ERROR(cm, lc->map,
|
||||||
|
vpx_malloc(mi_rows * mi_cols * sizeof(*lc->map)));
|
||||||
memset(lc->map, 0, mi_rows * mi_cols);
|
memset(lc->map, 0, mi_rows * mi_cols);
|
||||||
last_coded_q_map_size = mi_rows * mi_cols * sizeof(uint8_t);
|
last_coded_q_map_size = mi_rows * mi_cols *
|
||||||
lc->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
|
sizeof(*lc->last_coded_q_map);
|
||||||
|
CHECK_MEM_ERROR(cm, lc->last_coded_q_map,
|
||||||
|
vpx_malloc(last_coded_q_map_size));
|
||||||
assert(MAXQ <= 255);
|
assert(MAXQ <= 255);
|
||||||
memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size);
|
memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size);
|
||||||
consec_zero_mv_size = mi_rows * mi_cols * sizeof(uint8_t);
|
consec_zero_mv_size = mi_rows * mi_cols * sizeof(*lc->consec_zero_mv);
|
||||||
lc->consec_zero_mv = vpx_malloc(consec_zero_mv_size);
|
CHECK_MEM_ERROR(cm, lc->consec_zero_mv,
|
||||||
|
vpx_malloc(consec_zero_mv_size));
|
||||||
memset(lc->consec_zero_mv, 0, consec_zero_mv_size);
|
memset(lc->consec_zero_mv, 0, consec_zero_mv_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
#include "./vpx_config.h"
|
#include "./vpx_config.h"
|
||||||
#include "vpx/vpx_encoder.h"
|
#include "vpx/vpx_encoder.h"
|
||||||
#include "vpx_ports/vpx_once.h"
|
#include "vpx_ports/vpx_once.h"
|
||||||
|
#include "vpx_ports/system_state.h"
|
||||||
#include "vpx/internal/vpx_codec_internal.h"
|
#include "vpx/internal/vpx_codec_internal.h"
|
||||||
#include "./vpx_version.h"
|
#include "./vpx_version.h"
|
||||||
#include "vp9/encoder/vp9_encoder.h"
|
#include "vp9/encoder/vp9_encoder.h"
|
||||||
@ -967,18 +968,19 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
|
|||||||
const vpx_image_t *img,
|
const vpx_image_t *img,
|
||||||
vpx_codec_pts_t pts,
|
vpx_codec_pts_t pts,
|
||||||
unsigned long duration,
|
unsigned long duration,
|
||||||
vpx_enc_frame_flags_t flags,
|
vpx_enc_frame_flags_t enc_flags,
|
||||||
unsigned long deadline) {
|
unsigned long deadline) {
|
||||||
vpx_codec_err_t res = VPX_CODEC_OK;
|
volatile vpx_codec_err_t res = VPX_CODEC_OK;
|
||||||
|
volatile vpx_enc_frame_flags_t flags = enc_flags;
|
||||||
VP9_COMP *const cpi = ctx->cpi;
|
VP9_COMP *const cpi = ctx->cpi;
|
||||||
const vpx_rational_t *const timebase = &ctx->cfg.g_timebase;
|
const vpx_rational_t *const timebase = &ctx->cfg.g_timebase;
|
||||||
size_t data_sz;
|
size_t data_sz;
|
||||||
|
|
||||||
|
if (cpi == NULL) return VPX_CODEC_INVALID_PARAM;
|
||||||
|
|
||||||
if (img != NULL) {
|
if (img != NULL) {
|
||||||
res = validate_img(ctx, img);
|
res = validate_img(ctx, img);
|
||||||
// TODO(jzern) the checks related to cpi's validity should be treated as a
|
if (res == VPX_CODEC_OK) {
|
||||||
// failure condition, encoder setup is done fully in init() currently.
|
|
||||||
if (res == VPX_CODEC_OK && cpi != NULL) {
|
|
||||||
// There's no codec control for multiple alt-refs so check the encoder
|
// There's no codec control for multiple alt-refs so check the encoder
|
||||||
// instance for its status to determine the compressed data size.
|
// instance for its status to determine the compressed data size.
|
||||||
data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 *
|
data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 *
|
||||||
@ -1006,6 +1008,14 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
|
|||||||
return VPX_CODEC_INVALID_PARAM;
|
return VPX_CODEC_INVALID_PARAM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (setjmp(cpi->common.error.jmp)) {
|
||||||
|
cpi->common.error.setjmp = 0;
|
||||||
|
res = update_error_state(ctx, &cpi->common.error);
|
||||||
|
vpx_clear_system_state();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
cpi->common.error.setjmp = 1;
|
||||||
|
|
||||||
vp9_apply_encoding_flags(cpi, flags);
|
vp9_apply_encoding_flags(cpi, flags);
|
||||||
|
|
||||||
// Handle fixed keyframe intervals
|
// Handle fixed keyframe intervals
|
||||||
@ -1017,8 +1027,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the encoder instance on the first frame.
|
if (res == VPX_CODEC_OK) {
|
||||||
if (res == VPX_CODEC_OK && cpi != NULL) {
|
|
||||||
unsigned int lib_flags = 0;
|
unsigned int lib_flags = 0;
|
||||||
YV12_BUFFER_CONFIG sd;
|
YV12_BUFFER_CONFIG sd;
|
||||||
int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts);
|
int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts);
|
||||||
@ -1057,7 +1066,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
|
|||||||
* the buffer size anyway.
|
* the buffer size anyway.
|
||||||
*/
|
*/
|
||||||
if (cx_data_sz < ctx->cx_data_sz / 2) {
|
if (cx_data_sz < ctx->cx_data_sz / 2) {
|
||||||
ctx->base.err_detail = "Compressed data buffer too small";
|
vpx_internal_error(&cpi->common.error, VPX_CODEC_ERROR,
|
||||||
|
"Compressed data buffer too small");
|
||||||
return VPX_CODEC_ERROR;
|
return VPX_CODEC_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1175,6 +1185,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cpi->common.error.setjmp = 0;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,37 +16,28 @@
|
|||||||
|
|
||||||
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
|
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
|
||||||
; works on 16 iterations at a time.
|
; works on 16 iterations at a time.
|
||||||
; TODO(fgalligan): See about removing the count code as this function is only
|
|
||||||
; called with a count of 1.
|
|
||||||
;
|
;
|
||||||
; void vpx_lpf_horizontal_4_neon(uint8_t *s,
|
; void vpx_lpf_horizontal_4_neon(uint8_t *s,
|
||||||
; int p /* pitch */,
|
; int p /* pitch */,
|
||||||
; const uint8_t *blimit,
|
; const uint8_t *blimit,
|
||||||
; const uint8_t *limit,
|
; const uint8_t *limit,
|
||||||
; const uint8_t *thresh,
|
; const uint8_t *thresh)
|
||||||
; int count)
|
|
||||||
;
|
;
|
||||||
; r0 uint8_t *s,
|
; r0 uint8_t *s,
|
||||||
; r1 int p, /* pitch */
|
; r1 int p, /* pitch */
|
||||||
; r2 const uint8_t *blimit,
|
; r2 const uint8_t *blimit,
|
||||||
; r3 const uint8_t *limit,
|
; r3 const uint8_t *limit,
|
||||||
; sp const uint8_t *thresh,
|
; sp const uint8_t *thresh,
|
||||||
; sp+4 int count
|
|
||||||
|vpx_lpf_horizontal_4_neon| PROC
|
|vpx_lpf_horizontal_4_neon| PROC
|
||||||
push {lr}
|
push {lr}
|
||||||
|
|
||||||
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
||||||
ldr r12, [sp, #8] ; load count
|
|
||||||
ldr r2, [sp, #4] ; load thresh
|
ldr r2, [sp, #4] ; load thresh
|
||||||
add r1, r1, r1 ; double pitch
|
add r1, r1, r1 ; double pitch
|
||||||
|
|
||||||
cmp r12, #0
|
|
||||||
beq end_vpx_lf_h_edge
|
|
||||||
|
|
||||||
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
||||||
vld1.8 {d2[]}, [r2] ; duplicate *thresh
|
vld1.8 {d2[]}, [r2] ; duplicate *thresh
|
||||||
|
|
||||||
count_lf_h_loop
|
|
||||||
sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines
|
sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines
|
||||||
add r3, r2, r1, lsr #1 ; set to 3 lines down
|
add r3, r2, r1, lsr #1 ; set to 3 lines down
|
||||||
|
|
||||||
@ -69,47 +60,34 @@ count_lf_h_loop
|
|||||||
vst1.u8 {d6}, [r2@64], r1 ; store oq0
|
vst1.u8 {d6}, [r2@64], r1 ; store oq0
|
||||||
vst1.u8 {d7}, [r3@64], r1 ; store oq1
|
vst1.u8 {d7}, [r3@64], r1 ; store oq1
|
||||||
|
|
||||||
add r0, r0, #8
|
|
||||||
subs r12, r12, #1
|
|
||||||
bne count_lf_h_loop
|
|
||||||
|
|
||||||
end_vpx_lf_h_edge
|
|
||||||
pop {pc}
|
pop {pc}
|
||||||
ENDP ; |vpx_lpf_horizontal_4_neon|
|
ENDP ; |vpx_lpf_horizontal_4_neon|
|
||||||
|
|
||||||
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
|
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
|
||||||
; works on 16 iterations at a time.
|
; works on 16 iterations at a time.
|
||||||
; TODO(fgalligan): See about removing the count code as this function is only
|
|
||||||
; called with a count of 1.
|
|
||||||
;
|
;
|
||||||
; void vpx_lpf_vertical_4_neon(uint8_t *s,
|
; void vpx_lpf_vertical_4_neon(uint8_t *s,
|
||||||
; int p /* pitch */,
|
; int p /* pitch */,
|
||||||
; const uint8_t *blimit,
|
; const uint8_t *blimit,
|
||||||
; const uint8_t *limit,
|
; const uint8_t *limit,
|
||||||
; const uint8_t *thresh,
|
; const uint8_t *thresh)
|
||||||
; int count)
|
|
||||||
;
|
;
|
||||||
; r0 uint8_t *s,
|
; r0 uint8_t *s,
|
||||||
; r1 int p, /* pitch */
|
; r1 int p, /* pitch */
|
||||||
; r2 const uint8_t *blimit,
|
; r2 const uint8_t *blimit,
|
||||||
; r3 const uint8_t *limit,
|
; r3 const uint8_t *limit,
|
||||||
; sp const uint8_t *thresh,
|
; sp const uint8_t *thresh,
|
||||||
; sp+4 int count
|
|
||||||
|vpx_lpf_vertical_4_neon| PROC
|
|vpx_lpf_vertical_4_neon| PROC
|
||||||
push {lr}
|
push {lr}
|
||||||
|
|
||||||
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
||||||
ldr r12, [sp, #8] ; load count
|
|
||||||
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
||||||
|
|
||||||
ldr r3, [sp, #4] ; load thresh
|
ldr r3, [sp, #4] ; load thresh
|
||||||
sub r2, r0, #4 ; move s pointer down by 4 columns
|
sub r2, r0, #4 ; move s pointer down by 4 columns
|
||||||
cmp r12, #0
|
|
||||||
beq end_vpx_lf_v_edge
|
|
||||||
|
|
||||||
vld1.8 {d2[]}, [r3] ; duplicate *thresh
|
vld1.8 {d2[]}, [r3] ; duplicate *thresh
|
||||||
|
|
||||||
count_lf_v_loop
|
|
||||||
vld1.u8 {d3}, [r2], r1 ; load s data
|
vld1.u8 {d3}, [r2], r1 ; load s data
|
||||||
vld1.u8 {d4}, [r2], r1
|
vld1.u8 {d4}, [r2], r1
|
||||||
vld1.u8 {d5}, [r2], r1
|
vld1.u8 {d5}, [r2], r1
|
||||||
@ -149,12 +127,6 @@ count_lf_v_loop
|
|||||||
vst4.8 {d4[6], d5[6], d6[6], d7[6]}, [r0], r1
|
vst4.8 {d4[6], d5[6], d6[6], d7[6]}, [r0], r1
|
||||||
vst4.8 {d4[7], d5[7], d6[7], d7[7]}, [r0]
|
vst4.8 {d4[7], d5[7], d6[7], d7[7]}, [r0]
|
||||||
|
|
||||||
add r0, r0, r1, lsl #3 ; s += pitch * 8
|
|
||||||
subs r12, r12, #1
|
|
||||||
subne r2, r0, #4 ; move s pointer down by 4 columns
|
|
||||||
bne count_lf_v_loop
|
|
||||||
|
|
||||||
end_vpx_lf_v_edge
|
|
||||||
pop {pc}
|
pop {pc}
|
||||||
ENDP ; |vpx_lpf_vertical_4_neon|
|
ENDP ; |vpx_lpf_vertical_4_neon|
|
||||||
|
|
||||||
|
@ -115,22 +115,18 @@ void vpx_lpf_horizontal_4_neon(
|
|||||||
int pitch,
|
int pitch,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh,
|
const uint8_t *thresh) {
|
||||||
int count) {
|
|
||||||
int i;
|
int i;
|
||||||
uint8_t *s, *psrc;
|
uint8_t *s, *psrc;
|
||||||
uint8x8_t dblimit, dlimit, dthresh;
|
uint8x8_t dblimit, dlimit, dthresh;
|
||||||
uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
|
uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
|
||||||
|
|
||||||
if (count == 0) // end_vpx_lf_h_edge
|
|
||||||
return;
|
|
||||||
|
|
||||||
dblimit = vld1_u8(blimit);
|
dblimit = vld1_u8(blimit);
|
||||||
dlimit = vld1_u8(limit);
|
dlimit = vld1_u8(limit);
|
||||||
dthresh = vld1_u8(thresh);
|
dthresh = vld1_u8(thresh);
|
||||||
|
|
||||||
psrc = src - (pitch << 2);
|
psrc = src - (pitch << 2);
|
||||||
for (i = 0; i < count; i++) {
|
for (i = 0; i < 1; i++) {
|
||||||
s = psrc + i * 8;
|
s = psrc + i * 8;
|
||||||
|
|
||||||
d3u8 = vld1_u8(s);
|
d3u8 = vld1_u8(s);
|
||||||
@ -170,8 +166,7 @@ void vpx_lpf_vertical_4_neon(
|
|||||||
int pitch,
|
int pitch,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh,
|
const uint8_t *thresh) {
|
||||||
int count) {
|
|
||||||
int i, pitch8;
|
int i, pitch8;
|
||||||
uint8_t *s;
|
uint8_t *s;
|
||||||
uint8x8_t dblimit, dlimit, dthresh;
|
uint8x8_t dblimit, dlimit, dthresh;
|
||||||
@ -181,15 +176,12 @@ void vpx_lpf_vertical_4_neon(
|
|||||||
uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
|
uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
|
||||||
uint8x8x4_t d4Result;
|
uint8x8x4_t d4Result;
|
||||||
|
|
||||||
if (count == 0) // end_vpx_lf_h_edge
|
|
||||||
return;
|
|
||||||
|
|
||||||
dblimit = vld1_u8(blimit);
|
dblimit = vld1_u8(blimit);
|
||||||
dlimit = vld1_u8(limit);
|
dlimit = vld1_u8(limit);
|
||||||
dthresh = vld1_u8(thresh);
|
dthresh = vld1_u8(thresh);
|
||||||
|
|
||||||
pitch8 = pitch * 8;
|
pitch8 = pitch * 8;
|
||||||
for (i = 0; i < count; i++, src += pitch8) {
|
for (i = 0; i < 1; i++, src += pitch8) {
|
||||||
s = src - (i + 1) * 4;
|
s = src - (i + 1) * 4;
|
||||||
|
|
||||||
d3u8 = vld1_u8(s);
|
d3u8 = vld1_u8(s);
|
||||||
|
@ -16,35 +16,26 @@
|
|||||||
|
|
||||||
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
|
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
|
||||||
; works on 16 iterations at a time.
|
; works on 16 iterations at a time.
|
||||||
; TODO(fgalligan): See about removing the count code as this function is only
|
|
||||||
; called with a count of 1.
|
|
||||||
;
|
;
|
||||||
; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p,
|
; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p,
|
||||||
; const uint8_t *blimit,
|
; const uint8_t *blimit,
|
||||||
; const uint8_t *limit,
|
; const uint8_t *limit,
|
||||||
; const uint8_t *thresh,
|
; const uint8_t *thresh)
|
||||||
; int count)
|
|
||||||
; r0 uint8_t *s,
|
; r0 uint8_t *s,
|
||||||
; r1 int p, /* pitch */
|
; r1 int p, /* pitch */
|
||||||
; r2 const uint8_t *blimit,
|
; r2 const uint8_t *blimit,
|
||||||
; r3 const uint8_t *limit,
|
; r3 const uint8_t *limit,
|
||||||
; sp const uint8_t *thresh,
|
; sp const uint8_t *thresh,
|
||||||
; sp+4 int count
|
|
||||||
|vpx_lpf_horizontal_8_neon| PROC
|
|vpx_lpf_horizontal_8_neon| PROC
|
||||||
push {r4-r5, lr}
|
push {r4-r5, lr}
|
||||||
|
|
||||||
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
||||||
ldr r12, [sp, #16] ; load count
|
|
||||||
ldr r2, [sp, #12] ; load thresh
|
ldr r2, [sp, #12] ; load thresh
|
||||||
add r1, r1, r1 ; double pitch
|
add r1, r1, r1 ; double pitch
|
||||||
|
|
||||||
cmp r12, #0
|
|
||||||
beq end_vpx_mblf_h_edge
|
|
||||||
|
|
||||||
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
||||||
vld1.8 {d2[]}, [r2] ; duplicate *thresh
|
vld1.8 {d2[]}, [r2] ; duplicate *thresh
|
||||||
|
|
||||||
count_mblf_h_loop
|
|
||||||
sub r3, r0, r1, lsl #1 ; move src pointer down by 4 lines
|
sub r3, r0, r1, lsl #1 ; move src pointer down by 4 lines
|
||||||
add r2, r3, r1, lsr #1 ; set to 3 lines down
|
add r2, r3, r1, lsr #1 ; set to 3 lines down
|
||||||
|
|
||||||
@ -69,11 +60,6 @@ count_mblf_h_loop
|
|||||||
vst1.u8 {d4}, [r2@64], r1 ; store oq1
|
vst1.u8 {d4}, [r2@64], r1 ; store oq1
|
||||||
vst1.u8 {d5}, [r3@64], r1 ; store oq2
|
vst1.u8 {d5}, [r3@64], r1 ; store oq2
|
||||||
|
|
||||||
add r0, r0, #8
|
|
||||||
subs r12, r12, #1
|
|
||||||
bne count_mblf_h_loop
|
|
||||||
|
|
||||||
end_vpx_mblf_h_edge
|
|
||||||
pop {r4-r5, pc}
|
pop {r4-r5, pc}
|
||||||
|
|
||||||
ENDP ; |vpx_lpf_horizontal_8_neon|
|
ENDP ; |vpx_lpf_horizontal_8_neon|
|
||||||
@ -82,30 +68,24 @@ end_vpx_mblf_h_edge
|
|||||||
; int pitch,
|
; int pitch,
|
||||||
; const uint8_t *blimit,
|
; const uint8_t *blimit,
|
||||||
; const uint8_t *limit,
|
; const uint8_t *limit,
|
||||||
; const uint8_t *thresh,
|
; const uint8_t *thresh)
|
||||||
; int count)
|
|
||||||
;
|
;
|
||||||
; r0 uint8_t *s,
|
; r0 uint8_t *s,
|
||||||
; r1 int pitch,
|
; r1 int pitch,
|
||||||
; r2 const uint8_t *blimit,
|
; r2 const uint8_t *blimit,
|
||||||
; r3 const uint8_t *limit,
|
; r3 const uint8_t *limit,
|
||||||
; sp const uint8_t *thresh,
|
; sp const uint8_t *thresh,
|
||||||
; sp+4 int count
|
|
||||||
|vpx_lpf_vertical_8_neon| PROC
|
|vpx_lpf_vertical_8_neon| PROC
|
||||||
push {r4-r5, lr}
|
push {r4-r5, lr}
|
||||||
|
|
||||||
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
||||||
ldr r12, [sp, #16] ; load count
|
|
||||||
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
||||||
|
|
||||||
ldr r3, [sp, #12] ; load thresh
|
ldr r3, [sp, #12] ; load thresh
|
||||||
sub r2, r0, #4 ; move s pointer down by 4 columns
|
sub r2, r0, #4 ; move s pointer down by 4 columns
|
||||||
cmp r12, #0
|
|
||||||
beq end_vpx_mblf_v_edge
|
|
||||||
|
|
||||||
vld1.8 {d2[]}, [r3] ; duplicate *thresh
|
vld1.8 {d2[]}, [r3] ; duplicate *thresh
|
||||||
|
|
||||||
count_mblf_v_loop
|
|
||||||
vld1.u8 {d3}, [r2], r1 ; load s data
|
vld1.u8 {d3}, [r2], r1 ; load s data
|
||||||
vld1.u8 {d4}, [r2], r1
|
vld1.u8 {d4}, [r2], r1
|
||||||
vld1.u8 {d5}, [r2], r1
|
vld1.u8 {d5}, [r2], r1
|
||||||
@ -156,12 +136,6 @@ count_mblf_v_loop
|
|||||||
vst2.8 {d4[6], d5[6]}, [r3], r1
|
vst2.8 {d4[6], d5[6]}, [r3], r1
|
||||||
vst2.8 {d4[7], d5[7]}, [r3]
|
vst2.8 {d4[7], d5[7]}, [r3]
|
||||||
|
|
||||||
add r0, r0, r1, lsl #3 ; s += pitch * 8
|
|
||||||
subs r12, r12, #1
|
|
||||||
subne r2, r0, #4 ; move s pointer down by 4 columns
|
|
||||||
bne count_mblf_v_loop
|
|
||||||
|
|
||||||
end_vpx_mblf_v_edge
|
|
||||||
pop {r4-r5, pc}
|
pop {r4-r5, pc}
|
||||||
ENDP ; |vpx_lpf_vertical_8_neon|
|
ENDP ; |vpx_lpf_vertical_8_neon|
|
||||||
|
|
||||||
|
@ -268,23 +268,19 @@ void vpx_lpf_horizontal_8_neon(
|
|||||||
int pitch,
|
int pitch,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh,
|
const uint8_t *thresh) {
|
||||||
int count) {
|
|
||||||
int i;
|
int i;
|
||||||
uint8_t *s, *psrc;
|
uint8_t *s, *psrc;
|
||||||
uint8x8_t dblimit, dlimit, dthresh;
|
uint8x8_t dblimit, dlimit, dthresh;
|
||||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
|
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
|
||||||
uint8x8_t d16u8, d17u8, d18u8;
|
uint8x8_t d16u8, d17u8, d18u8;
|
||||||
|
|
||||||
if (count == 0) // end_vpx_mblf_h_edge
|
|
||||||
return;
|
|
||||||
|
|
||||||
dblimit = vld1_u8(blimit);
|
dblimit = vld1_u8(blimit);
|
||||||
dlimit = vld1_u8(limit);
|
dlimit = vld1_u8(limit);
|
||||||
dthresh = vld1_u8(thresh);
|
dthresh = vld1_u8(thresh);
|
||||||
|
|
||||||
psrc = src - (pitch << 2);
|
psrc = src - (pitch << 2);
|
||||||
for (i = 0; i < count; i++) {
|
for (i = 0; i < 1; i++) {
|
||||||
s = psrc + i * 8;
|
s = psrc + i * 8;
|
||||||
|
|
||||||
d3u8 = vld1_u8(s);
|
d3u8 = vld1_u8(s);
|
||||||
@ -328,8 +324,7 @@ void vpx_lpf_vertical_8_neon(
|
|||||||
int pitch,
|
int pitch,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh,
|
const uint8_t *thresh) {
|
||||||
int count) {
|
|
||||||
int i;
|
int i;
|
||||||
uint8_t *s;
|
uint8_t *s;
|
||||||
uint8x8_t dblimit, dlimit, dthresh;
|
uint8x8_t dblimit, dlimit, dthresh;
|
||||||
@ -341,14 +336,11 @@ void vpx_lpf_vertical_8_neon(
|
|||||||
uint8x8x4_t d4Result;
|
uint8x8x4_t d4Result;
|
||||||
uint8x8x2_t d2Result;
|
uint8x8x2_t d2Result;
|
||||||
|
|
||||||
if (count == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
dblimit = vld1_u8(blimit);
|
dblimit = vld1_u8(blimit);
|
||||||
dlimit = vld1_u8(limit);
|
dlimit = vld1_u8(limit);
|
||||||
dthresh = vld1_u8(thresh);
|
dthresh = vld1_u8(thresh);
|
||||||
|
|
||||||
for (i = 0; i < count; i++) {
|
for (i = 0; i < 1; i++) {
|
||||||
s = src + (i * (pitch << 3)) - 4;
|
s = src + (i * (pitch << 3)) - 4;
|
||||||
|
|
||||||
d3u8 = vld1_u8(s);
|
d3u8 = vld1_u8(s);
|
||||||
|
@ -8,27 +8,28 @@
|
|||||||
; be found in the AUTHORS file in the root of the source tree.
|
; be found in the AUTHORS file in the root of the source tree.
|
||||||
;
|
;
|
||||||
|
|
||||||
EXPORT |vpx_lpf_horizontal_16_neon|
|
EXPORT |vpx_lpf_horizontal_edge_8_neon|
|
||||||
|
EXPORT |vpx_lpf_horizontal_edge_16_neon|
|
||||||
EXPORT |vpx_lpf_vertical_16_neon|
|
EXPORT |vpx_lpf_vertical_16_neon|
|
||||||
ARM
|
ARM
|
||||||
|
|
||||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||||
|
|
||||||
; void vpx_lpf_horizontal_16_neon(uint8_t *s, int p,
|
; void mb_lpf_horizontal_edge(uint8_t *s, int p,
|
||||||
; const uint8_t *blimit,
|
; const uint8_t *blimit,
|
||||||
; const uint8_t *limit,
|
; const uint8_t *limit,
|
||||||
; const uint8_t *thresh
|
; const uint8_t *thresh,
|
||||||
; int count)
|
; int count)
|
||||||
; r0 uint8_t *s,
|
; r0 uint8_t *s,
|
||||||
; r1 int p, /* pitch */
|
; r1 int p, /* pitch */
|
||||||
; r2 const uint8_t *blimit,
|
; r2 const uint8_t *blimit,
|
||||||
; r3 const uint8_t *limit,
|
; r3 const uint8_t *limit,
|
||||||
; sp const uint8_t *thresh,
|
; sp const uint8_t *thresh,
|
||||||
|vpx_lpf_horizontal_16_neon| PROC
|
; r12 int count
|
||||||
|
|mb_lpf_horizontal_edge| PROC
|
||||||
push {r4-r8, lr}
|
push {r4-r8, lr}
|
||||||
vpush {d8-d15}
|
vpush {d8-d15}
|
||||||
ldr r4, [sp, #88] ; load thresh
|
ldr r4, [sp, #88] ; load thresh
|
||||||
ldr r12, [sp, #92] ; load count
|
|
||||||
|
|
||||||
h_count
|
h_count
|
||||||
vld1.8 {d16[]}, [r2] ; load *blimit
|
vld1.8 {d16[]}, [r2] ; load *blimit
|
||||||
@ -115,7 +116,35 @@ h_next
|
|||||||
vpop {d8-d15}
|
vpop {d8-d15}
|
||||||
pop {r4-r8, pc}
|
pop {r4-r8, pc}
|
||||||
|
|
||||||
ENDP ; |vpx_lpf_horizontal_16_neon|
|
ENDP ; |mb_lpf_horizontal_edge|
|
||||||
|
|
||||||
|
; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch,
|
||||||
|
; const uint8_t *blimit,
|
||||||
|
; const uint8_t *limit,
|
||||||
|
; const uint8_t *thresh)
|
||||||
|
; r0 uint8_t *s,
|
||||||
|
; r1 int pitch,
|
||||||
|
; r2 const uint8_t *blimit,
|
||||||
|
; r3 const uint8_t *limit,
|
||||||
|
; sp const uint8_t *thresh
|
||||||
|
|vpx_lpf_horizontal_edge_8_neon| PROC
|
||||||
|
mov r12, #1
|
||||||
|
b mb_lpf_horizontal_edge
|
||||||
|
ENDP ; |vpx_lpf_horizontal_edge_8_neon|
|
||||||
|
|
||||||
|
; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch,
|
||||||
|
; const uint8_t *blimit,
|
||||||
|
; const uint8_t *limit,
|
||||||
|
; const uint8_t *thresh)
|
||||||
|
; r0 uint8_t *s,
|
||||||
|
; r1 int pitch,
|
||||||
|
; r2 const uint8_t *blimit,
|
||||||
|
; r3 const uint8_t *limit,
|
||||||
|
; sp const uint8_t *thresh
|
||||||
|
|vpx_lpf_horizontal_edge_16_neon| PROC
|
||||||
|
mov r12, #2
|
||||||
|
b mb_lpf_horizontal_edge
|
||||||
|
ENDP ; |vpx_lpf_horizontal_edge_16_neon|
|
||||||
|
|
||||||
; void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
|
; void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
|
||||||
; const uint8_t *blimit,
|
; const uint8_t *blimit,
|
||||||
|
@ -21,8 +21,8 @@ void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p,
|
|||||||
const uint8_t *blimit1,
|
const uint8_t *blimit1,
|
||||||
const uint8_t *limit1,
|
const uint8_t *limit1,
|
||||||
const uint8_t *thresh1) {
|
const uint8_t *thresh1) {
|
||||||
vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);
|
vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
|
vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_NEON_ASM
|
#if HAVE_NEON_ASM
|
||||||
@ -33,8 +33,8 @@ void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
|
|||||||
const uint8_t *blimit1,
|
const uint8_t *blimit1,
|
||||||
const uint8_t *limit1,
|
const uint8_t *limit1,
|
||||||
const uint8_t *thresh1) {
|
const uint8_t *thresh1) {
|
||||||
vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1);
|
vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1);
|
vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p,
|
void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p,
|
||||||
@ -44,8 +44,8 @@ void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p,
|
|||||||
const uint8_t *blimit1,
|
const uint8_t *blimit1,
|
||||||
const uint8_t *limit1,
|
const uint8_t *limit1,
|
||||||
const uint8_t *thresh1) {
|
const uint8_t *thresh1) {
|
||||||
vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);
|
vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
|
vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p,
|
void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p,
|
||||||
|
@ -119,12 +119,12 @@ static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1,
|
|||||||
|
|
||||||
void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
|
void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
|
||||||
const uint8_t *blimit, const uint8_t *limit,
|
const uint8_t *blimit, const uint8_t *limit,
|
||||||
const uint8_t *thresh, int count) {
|
const uint8_t *thresh) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8 * count; ++i) {
|
for (i = 0; i < 8; ++i) {
|
||||||
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
||||||
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
||||||
const int8_t mask = filter_mask(*limit, *blimit,
|
const int8_t mask = filter_mask(*limit, *blimit,
|
||||||
@ -138,18 +138,17 @@ void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
|
|||||||
const uint8_t *limit0, const uint8_t *thresh0,
|
const uint8_t *limit0, const uint8_t *thresh0,
|
||||||
const uint8_t *blimit1, const uint8_t *limit1,
|
const uint8_t *blimit1, const uint8_t *limit1,
|
||||||
const uint8_t *thresh1) {
|
const uint8_t *thresh1) {
|
||||||
vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
|
vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
|
vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
|
void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
int count) {
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8 * count; ++i) {
|
for (i = 0; i < 8; ++i) {
|
||||||
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
||||||
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
||||||
const int8_t mask = filter_mask(*limit, *blimit,
|
const int8_t mask = filter_mask(*limit, *blimit,
|
||||||
@ -163,9 +162,8 @@ void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
|
|||||||
const uint8_t *limit0, const uint8_t *thresh0,
|
const uint8_t *limit0, const uint8_t *thresh0,
|
||||||
const uint8_t *blimit1, const uint8_t *limit1,
|
const uint8_t *blimit1, const uint8_t *limit1,
|
||||||
const uint8_t *thresh1) {
|
const uint8_t *thresh1) {
|
||||||
vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);
|
vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
|
vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
|
||||||
thresh1, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat,
|
static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat,
|
||||||
@ -190,13 +188,12 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
|
void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
int count) {
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8 * count; ++i) {
|
for (i = 0; i < 8; ++i) {
|
||||||
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
||||||
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
||||||
|
|
||||||
@ -213,16 +210,15 @@ void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
|
|||||||
const uint8_t *limit0, const uint8_t *thresh0,
|
const uint8_t *limit0, const uint8_t *thresh0,
|
||||||
const uint8_t *blimit1, const uint8_t *limit1,
|
const uint8_t *blimit1, const uint8_t *limit1,
|
||||||
const uint8_t *thresh1) {
|
const uint8_t *thresh1) {
|
||||||
vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);
|
vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);
|
vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
|
void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
int count) {
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < 8 * count; ++i) {
|
for (i = 0; i < 8; ++i) {
|
||||||
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
||||||
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
||||||
const int8_t mask = filter_mask(*limit, *blimit,
|
const int8_t mask = filter_mask(*limit, *blimit,
|
||||||
@ -238,9 +234,8 @@ void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
|
|||||||
const uint8_t *limit0, const uint8_t *thresh0,
|
const uint8_t *limit0, const uint8_t *thresh0,
|
||||||
const uint8_t *blimit1, const uint8_t *limit1,
|
const uint8_t *blimit1, const uint8_t *limit1,
|
||||||
const uint8_t *thresh1) {
|
const uint8_t *thresh1) {
|
||||||
vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);
|
vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
|
vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
|
||||||
thresh1, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE void filter16(int8_t mask, uint8_t thresh,
|
static INLINE void filter16(int8_t mask, uint8_t thresh,
|
||||||
@ -294,9 +289,9 @@ static INLINE void filter16(int8_t mask, uint8_t thresh,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *limit,
|
||||||
int count) {
|
const uint8_t *thresh, int count) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
@ -320,6 +315,16 @@ void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
|
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
|
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2);
|
||||||
|
}
|
||||||
|
|
||||||
static void mb_lpf_vertical_edge_w(uint8_t *s, int p,
|
static void mb_lpf_vertical_edge_w(uint8_t *s, int p,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
@ -450,12 +455,12 @@ static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1,
|
|||||||
|
|
||||||
void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
|
void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
|
||||||
const uint8_t *blimit, const uint8_t *limit,
|
const uint8_t *blimit, const uint8_t *limit,
|
||||||
const uint8_t *thresh, int count, int bd) {
|
const uint8_t *thresh, int bd) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8 * count; ++i) {
|
for (i = 0; i < 8; ++i) {
|
||||||
const uint16_t p3 = s[-4 * p];
|
const uint16_t p3 = s[-4 * p];
|
||||||
const uint16_t p2 = s[-3 * p];
|
const uint16_t p2 = s[-3 * p];
|
||||||
const uint16_t p1 = s[-2 * p];
|
const uint16_t p1 = s[-2 * p];
|
||||||
@ -479,18 +484,18 @@ void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p,
|
|||||||
const uint8_t *limit1,
|
const uint8_t *limit1,
|
||||||
const uint8_t *thresh1,
|
const uint8_t *thresh1,
|
||||||
int bd) {
|
int bd) {
|
||||||
vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd);
|
vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, bd);
|
||||||
vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
|
vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
|
void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *limit, const uint8_t *thresh,
|
||||||
int count, int bd) {
|
int bd) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8 * count; ++i) {
|
for (i = 0; i < 8; ++i) {
|
||||||
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
||||||
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
||||||
const int8_t mask = highbd_filter_mask(*limit, *blimit,
|
const int8_t mask = highbd_filter_mask(*limit, *blimit,
|
||||||
@ -508,9 +513,9 @@ void vpx_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch,
|
|||||||
const uint8_t *limit1,
|
const uint8_t *limit1,
|
||||||
const uint8_t *thresh1,
|
const uint8_t *thresh1,
|
||||||
int bd) {
|
int bd) {
|
||||||
vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
|
vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, bd);
|
||||||
vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
|
vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
|
||||||
thresh1, 1, bd);
|
thresh1, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat,
|
static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat,
|
||||||
@ -536,12 +541,12 @@ static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat,
|
|||||||
|
|
||||||
void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
|
void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *limit, const uint8_t *thresh,
|
||||||
int count, int bd) {
|
int bd) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8 * count; ++i) {
|
for (i = 0; i < 8; ++i) {
|
||||||
const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
||||||
const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
||||||
|
|
||||||
@ -564,16 +569,16 @@ void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p,
|
|||||||
const uint8_t *limit1,
|
const uint8_t *limit1,
|
||||||
const uint8_t *thresh1,
|
const uint8_t *thresh1,
|
||||||
int bd) {
|
int bd) {
|
||||||
vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd);
|
vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, bd);
|
||||||
vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
|
vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
|
void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *limit, const uint8_t *thresh,
|
||||||
int count, int bd) {
|
int bd) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < 8 * count; ++i) {
|
for (i = 0; i < 8; ++i) {
|
||||||
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
||||||
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
||||||
const int8_t mask = highbd_filter_mask(*limit, *blimit,
|
const int8_t mask = highbd_filter_mask(*limit, *blimit,
|
||||||
@ -596,9 +601,9 @@ void vpx_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch,
|
|||||||
const uint8_t *limit1,
|
const uint8_t *limit1,
|
||||||
const uint8_t *thresh1,
|
const uint8_t *thresh1,
|
||||||
int bd) {
|
int bd) {
|
||||||
vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
|
vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, bd);
|
||||||
vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
|
vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
|
||||||
thresh1, 1, bd);
|
thresh1, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE void highbd_filter16(int8_t mask, uint8_t thresh,
|
static INLINE void highbd_filter16(int8_t mask, uint8_t thresh,
|
||||||
@ -664,8 +669,10 @@ static INLINE void highbd_filter16(int8_t mask, uint8_t thresh,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
|
static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *blimit,
|
||||||
|
const uint8_t *limit,
|
||||||
|
const uint8_t *thresh,
|
||||||
int count, int bd) {
|
int count, int bd) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -698,6 +705,20 @@ void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void vpx_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p,
|
||||||
|
const uint8_t *blimit,
|
||||||
|
const uint8_t *limit,
|
||||||
|
const uint8_t *thresh, int bd) {
|
||||||
|
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vpx_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p,
|
||||||
|
const uint8_t *blimit,
|
||||||
|
const uint8_t *limit,
|
||||||
|
const uint8_t *thresh, int bd) {
|
||||||
|
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd);
|
||||||
|
}
|
||||||
|
|
||||||
static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
|
static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
|
@ -423,7 +423,7 @@ void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
|
static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch,
|
||||||
const uint8_t *b_limit_ptr,
|
const uint8_t *b_limit_ptr,
|
||||||
const uint8_t *limit_ptr,
|
const uint8_t *limit_ptr,
|
||||||
const uint8_t *thresh_ptr,
|
const uint8_t *thresh_ptr,
|
||||||
@ -648,6 +648,20 @@ void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void vpx_lpf_horizontal_edge_8_msa(uint8_t *src, int32_t pitch,
|
||||||
|
const uint8_t *b_limit_ptr,
|
||||||
|
const uint8_t *limit_ptr,
|
||||||
|
const uint8_t *thresh_ptr) {
|
||||||
|
mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vpx_lpf_horizontal_edge_16_msa(uint8_t *src, int32_t pitch,
|
||||||
|
const uint8_t *b_limit_ptr,
|
||||||
|
const uint8_t *limit_ptr,
|
||||||
|
const uint8_t *thresh_ptr) {
|
||||||
|
mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 2);
|
||||||
|
}
|
||||||
|
|
||||||
static void transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch,
|
static void transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch,
|
||||||
uint8_t *output, int32_t out_pitch) {
|
uint8_t *output, int32_t out_pitch) {
|
||||||
v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org;
|
v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org;
|
||||||
|
@ -13,14 +13,11 @@
|
|||||||
void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
|
void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
|
||||||
const uint8_t *b_limit_ptr,
|
const uint8_t *b_limit_ptr,
|
||||||
const uint8_t *limit_ptr,
|
const uint8_t *limit_ptr,
|
||||||
const uint8_t *thresh_ptr,
|
const uint8_t *thresh_ptr) {
|
||||||
int32_t count) {
|
|
||||||
uint64_t p1_d, p0_d, q0_d, q1_d;
|
uint64_t p1_d, p0_d, q0_d, q1_d;
|
||||||
v16u8 mask, hev, flat, thresh, b_limit, limit;
|
v16u8 mask, hev, flat, thresh, b_limit, limit;
|
||||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out;
|
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out;
|
||||||
|
|
||||||
(void)count;
|
|
||||||
|
|
||||||
/* load vector elements */
|
/* load vector elements */
|
||||||
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
||||||
|
|
||||||
@ -74,14 +71,11 @@ void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
|
|||||||
void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
|
void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
|
||||||
const uint8_t *b_limit_ptr,
|
const uint8_t *b_limit_ptr,
|
||||||
const uint8_t *limit_ptr,
|
const uint8_t *limit_ptr,
|
||||||
const uint8_t *thresh_ptr,
|
const uint8_t *thresh_ptr) {
|
||||||
int32_t count) {
|
|
||||||
v16u8 mask, hev, flat, limit, thresh, b_limit;
|
v16u8 mask, hev, flat, limit, thresh, b_limit;
|
||||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||||
v8i16 vec0, vec1, vec2, vec3;
|
v8i16 vec0, vec1, vec2, vec3;
|
||||||
|
|
||||||
(void)count;
|
|
||||||
|
|
||||||
LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
||||||
|
|
||||||
thresh = (v16u8)__msa_fill_b(*thresh_ptr);
|
thresh = (v16u8)__msa_fill_b(*thresh_ptr);
|
||||||
|
@ -13,8 +13,7 @@
|
|||||||
void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
|
void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
|
||||||
const uint8_t *b_limit_ptr,
|
const uint8_t *b_limit_ptr,
|
||||||
const uint8_t *limit_ptr,
|
const uint8_t *limit_ptr,
|
||||||
const uint8_t *thresh_ptr,
|
const uint8_t *thresh_ptr) {
|
||||||
int32_t count) {
|
|
||||||
uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
|
uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
|
||||||
v16u8 mask, hev, flat, thresh, b_limit, limit;
|
v16u8 mask, hev, flat, thresh, b_limit, limit;
|
||||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||||
@ -23,8 +22,6 @@ void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
|
|||||||
v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
|
v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
|
||||||
v16i8 zero = { 0 };
|
v16i8 zero = { 0 };
|
||||||
|
|
||||||
(void)count;
|
|
||||||
|
|
||||||
/* load vector elements */
|
/* load vector elements */
|
||||||
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
||||||
|
|
||||||
@ -161,8 +158,7 @@ void vpx_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
|
|||||||
void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
|
void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
|
||||||
const uint8_t *b_limit_ptr,
|
const uint8_t *b_limit_ptr,
|
||||||
const uint8_t *limit_ptr,
|
const uint8_t *limit_ptr,
|
||||||
const uint8_t *thresh_ptr,
|
const uint8_t *thresh_ptr) {
|
||||||
int32_t count) {
|
|
||||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||||
v16u8 p1_out, p0_out, q0_out, q1_out;
|
v16u8 p1_out, p0_out, q0_out, q1_out;
|
||||||
v16u8 flat, mask, hev, thresh, b_limit, limit;
|
v16u8 flat, mask, hev, thresh, b_limit, limit;
|
||||||
@ -171,8 +167,6 @@ void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
|
|||||||
v16u8 zero = { 0 };
|
v16u8 zero = { 0 };
|
||||||
v8i16 vec0, vec1, vec2, vec3, vec4;
|
v8i16 vec0, vec1, vec2, vec3, vec4;
|
||||||
|
|
||||||
(void)count;
|
|
||||||
|
|
||||||
/* load vector elements */
|
/* load vector elements */
|
||||||
LD_UB8(src - 4, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
LD_UB8(src - 4, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
||||||
|
|
||||||
|
@ -23,8 +23,7 @@ void vpx_lpf_horizontal_4_dspr2(unsigned char *s,
|
|||||||
int pitch,
|
int pitch,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh,
|
const uint8_t *thresh) {
|
||||||
int count) {
|
|
||||||
uint8_t i;
|
uint8_t i;
|
||||||
uint32_t mask;
|
uint32_t mask;
|
||||||
uint32_t hev;
|
uint32_t hev;
|
||||||
@ -117,8 +116,7 @@ void vpx_lpf_vertical_4_dspr2(unsigned char *s,
|
|||||||
int pitch,
|
int pitch,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh,
|
const uint8_t *thresh) {
|
||||||
int count) {
|
|
||||||
uint8_t i;
|
uint8_t i;
|
||||||
uint32_t mask, hev;
|
uint32_t mask, hev;
|
||||||
uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
|
uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
|
||||||
@ -313,8 +311,8 @@ void vpx_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,
|
|||||||
const uint8_t *blimit1,
|
const uint8_t *blimit1,
|
||||||
const uint8_t *limit1,
|
const uint8_t *limit1,
|
||||||
const uint8_t *thresh1) {
|
const uint8_t *thresh1) {
|
||||||
vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
|
vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
|
vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
|
void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
|
||||||
@ -324,8 +322,8 @@ void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
|
|||||||
const uint8_t *blimit1,
|
const uint8_t *blimit1,
|
||||||
const uint8_t *limit1,
|
const uint8_t *limit1,
|
||||||
const uint8_t *thresh1) {
|
const uint8_t *thresh1) {
|
||||||
vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
|
vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
|
vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
|
void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
|
||||||
@ -335,8 +333,8 @@ void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
|
|||||||
const uint8_t *blimit1,
|
const uint8_t *blimit1,
|
||||||
const uint8_t *limit1,
|
const uint8_t *limit1,
|
||||||
const uint8_t *thresh1) {
|
const uint8_t *thresh1) {
|
||||||
vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
|
vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);
|
vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
|
void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
|
||||||
@ -346,9 +344,8 @@ void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
|
|||||||
const uint8_t *blimit1,
|
const uint8_t *blimit1,
|
||||||
const uint8_t *limit1,
|
const uint8_t *limit1,
|
||||||
const uint8_t *thresh1) {
|
const uint8_t *thresh1) {
|
||||||
vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
|
vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
|
vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
|
||||||
1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,
|
void vpx_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,
|
||||||
|
@ -23,8 +23,7 @@ void vpx_lpf_horizontal_8_dspr2(unsigned char *s,
|
|||||||
int pitch,
|
int pitch,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh,
|
const uint8_t *thresh) {
|
||||||
int count) {
|
|
||||||
uint32_t mask;
|
uint32_t mask;
|
||||||
uint32_t hev, flat;
|
uint32_t hev, flat;
|
||||||
uint8_t i;
|
uint8_t i;
|
||||||
@ -322,8 +321,7 @@ void vpx_lpf_vertical_8_dspr2(unsigned char *s,
|
|||||||
int pitch,
|
int pitch,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh,
|
const uint8_t *thresh) {
|
||||||
int count) {
|
|
||||||
uint8_t i;
|
uint8_t i;
|
||||||
uint32_t mask, hev, flat;
|
uint32_t mask, hev, flat;
|
||||||
uint8_t *s1, *s2, *s3, *s4;
|
uint8_t *s1, *s2, *s3, *s4;
|
||||||
|
@ -19,7 +19,7 @@
|
|||||||
#include "vpx_mem/vpx_mem.h"
|
#include "vpx_mem/vpx_mem.h"
|
||||||
|
|
||||||
#if HAVE_DSPR2
|
#if HAVE_DSPR2
|
||||||
void vpx_lpf_horizontal_16_dspr2(unsigned char *s,
|
static void mb_lpf_horizontal_edge(unsigned char *s,
|
||||||
int pitch,
|
int pitch,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
@ -791,4 +791,18 @@ void vpx_lpf_horizontal_16_dspr2(unsigned char *s,
|
|||||||
s = s + 4;
|
s = s + 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void vpx_lpf_horizontal_edge_8_dspr2(unsigned char *s, int pitch,
|
||||||
|
const uint8_t *blimit,
|
||||||
|
const uint8_t *limit,
|
||||||
|
const uint8_t *thresh) {
|
||||||
|
mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vpx_lpf_horizontal_edge_16_dspr2(unsigned char *s, int pitch,
|
||||||
|
const uint8_t *blimit,
|
||||||
|
const uint8_t *limit,
|
||||||
|
const uint8_t *thresh) {
|
||||||
|
mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 2);
|
||||||
|
}
|
||||||
#endif // #if HAVE_DSPR2
|
#endif // #if HAVE_DSPR2
|
||||||
|
@ -535,31 +535,35 @@ add_proto qw/void vpx_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8
|
|||||||
specialize qw/vpx_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
|
specialize qw/vpx_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
|
||||||
$vpx_lpf_vertical_16_dual_neon_asm=vpx_lpf_vertical_16_dual_neon;
|
$vpx_lpf_vertical_16_dual_neon_asm=vpx_lpf_vertical_16_dual_neon;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
|
add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||||
specialize qw/vpx_lpf_vertical_8 sse2 neon dspr2 msa/;
|
specialize qw/vpx_lpf_vertical_8 sse2 neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||||
specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
|
specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
|
||||||
$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;
|
$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
|
add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||||
specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/;
|
specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||||
specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/;
|
specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
|
add_proto qw/void vpx_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||||
specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
|
specialize qw/vpx_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/;
|
||||||
$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon;
|
$vpx_lpf_horizontal_edge_8_neon_asm=vpx_lpf_horizontal_edge_8_neon;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
|
add_proto qw/void vpx_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||||
|
specialize qw/vpx_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/;
|
||||||
|
$vpx_lpf_horizontal_edge_16_neon_asm=vpx_lpf_horizontal_edge_16_neon;
|
||||||
|
|
||||||
|
add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||||
specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
|
specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||||
specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
|
specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
|
||||||
$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
|
$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
|
add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||||
specialize qw/vpx_lpf_horizontal_4 mmx neon dspr2 msa/;
|
specialize qw/vpx_lpf_horizontal_4 mmx neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||||
@ -572,28 +576,31 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
||||||
specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/;
|
specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
|
add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
||||||
specialize qw/vpx_highbd_lpf_vertical_8 sse2/;
|
specialize qw/vpx_highbd_lpf_vertical_8 sse2/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
|
add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
|
||||||
specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/;
|
specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
|
add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
||||||
specialize qw/vpx_highbd_lpf_vertical_4 sse2/;
|
specialize qw/vpx_highbd_lpf_vertical_4 sse2/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
|
add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
|
||||||
specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;
|
specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
|
add_proto qw/void vpx_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
||||||
specialize qw/vpx_highbd_lpf_horizontal_16 sse2/;
|
specialize qw/vpx_highbd_lpf_horizontal_edge_8 sse2/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
|
add_proto qw/void vpx_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
||||||
|
specialize qw/vpx_highbd_lpf_horizontal_edge_16 sse2/;
|
||||||
|
|
||||||
|
add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
||||||
specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
|
specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
|
add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
|
||||||
specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/;
|
specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
|
add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
||||||
specialize qw/vpx_highbd_lpf_horizontal_4 sse2/;
|
specialize qw/vpx_highbd_lpf_horizontal_4 sse2/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
|
add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
|
||||||
|
@ -51,12 +51,10 @@ static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) {
|
|||||||
|
|
||||||
// TODO(debargha, peter): Break up large functions into smaller ones
|
// TODO(debargha, peter): Break up large functions into smaller ones
|
||||||
// in this file.
|
// in this file.
|
||||||
static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s,
|
void vpx_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p,
|
||||||
int p,
|
|
||||||
const uint8_t *_blimit,
|
const uint8_t *_blimit,
|
||||||
const uint8_t *_limit,
|
const uint8_t *_limit,
|
||||||
const uint8_t *_thresh,
|
const uint8_t *_thresh, int bd) {
|
||||||
int bd) {
|
|
||||||
const __m128i zero = _mm_set1_epi16(0);
|
const __m128i zero = _mm_set1_epi16(0);
|
||||||
const __m128i one = _mm_set1_epi16(1);
|
const __m128i one = _mm_set1_epi16(1);
|
||||||
__m128i blimit, limit, thresh;
|
__m128i blimit, limit, thresh;
|
||||||
@ -496,34 +494,19 @@ static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s,
|
|||||||
_mm_store_si128((__m128i *)(s - 0 * p), q0);
|
_mm_store_si128((__m128i *)(s - 0 * p), q0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void highbd_mb_lpf_horizontal_edge_w_sse2_16(uint16_t *s,
|
void vpx_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p,
|
||||||
int p,
|
|
||||||
const uint8_t *_blimit,
|
const uint8_t *_blimit,
|
||||||
const uint8_t *_limit,
|
const uint8_t *_limit,
|
||||||
const uint8_t *_thresh,
|
const uint8_t *_thresh, int bd) {
|
||||||
int bd) {
|
vpx_highbd_lpf_horizontal_edge_8_sse2(s, p, _blimit, _limit, _thresh, bd);
|
||||||
highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd);
|
vpx_highbd_lpf_horizontal_edge_8_sse2(s + 8, p, _blimit, _limit, _thresh, bd);
|
||||||
highbd_mb_lpf_horizontal_edge_w_sse2_8(s + 8, p, _blimit, _limit, _thresh,
|
|
||||||
bd);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
|
|
||||||
void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,
|
|
||||||
const uint8_t *_blimit,
|
|
||||||
const uint8_t *_limit,
|
|
||||||
const uint8_t *_thresh,
|
|
||||||
int count, int bd) {
|
|
||||||
if (count == 1)
|
|
||||||
highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd);
|
|
||||||
else
|
|
||||||
highbd_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh, bd);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
|
void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
|
||||||
const uint8_t *_blimit,
|
const uint8_t *_blimit,
|
||||||
const uint8_t *_limit,
|
const uint8_t *_limit,
|
||||||
const uint8_t *_thresh,
|
const uint8_t *_thresh,
|
||||||
int count, int bd) {
|
int bd) {
|
||||||
DECLARE_ALIGNED(16, uint16_t, flat_op2[16]);
|
DECLARE_ALIGNED(16, uint16_t, flat_op2[16]);
|
||||||
DECLARE_ALIGNED(16, uint16_t, flat_op1[16]);
|
DECLARE_ALIGNED(16, uint16_t, flat_op1[16]);
|
||||||
DECLARE_ALIGNED(16, uint16_t, flat_op0[16]);
|
DECLARE_ALIGNED(16, uint16_t, flat_op0[16]);
|
||||||
@ -556,8 +539,6 @@ void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
|
|||||||
__m128i work_a;
|
__m128i work_a;
|
||||||
__m128i filter1, filter2;
|
__m128i filter1, filter2;
|
||||||
|
|
||||||
(void)count;
|
|
||||||
|
|
||||||
if (bd == 8) {
|
if (bd == 8) {
|
||||||
blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero);
|
blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero);
|
||||||
limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero);
|
limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero);
|
||||||
@ -764,16 +745,15 @@ void vpx_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p,
|
|||||||
const uint8_t *_limit1,
|
const uint8_t *_limit1,
|
||||||
const uint8_t *_thresh1,
|
const uint8_t *_thresh1,
|
||||||
int bd) {
|
int bd) {
|
||||||
vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
|
vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, bd);
|
||||||
vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1,
|
vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd);
|
||||||
1, bd);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
|
void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
|
||||||
const uint8_t *_blimit,
|
const uint8_t *_blimit,
|
||||||
const uint8_t *_limit,
|
const uint8_t *_limit,
|
||||||
const uint8_t *_thresh,
|
const uint8_t *_thresh,
|
||||||
int count, int bd) {
|
int bd) {
|
||||||
const __m128i zero = _mm_set1_epi16(0);
|
const __m128i zero = _mm_set1_epi16(0);
|
||||||
__m128i blimit, limit, thresh;
|
__m128i blimit, limit, thresh;
|
||||||
__m128i mask, hev, flat;
|
__m128i mask, hev, flat;
|
||||||
@ -813,8 +793,6 @@ void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
|
|||||||
__m128i work_a;
|
__m128i work_a;
|
||||||
__m128i filter1, filter2;
|
__m128i filter1, filter2;
|
||||||
|
|
||||||
(void)count;
|
|
||||||
|
|
||||||
if (bd == 8) {
|
if (bd == 8) {
|
||||||
blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero);
|
blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero);
|
||||||
limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero);
|
limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero);
|
||||||
@ -944,9 +922,8 @@ void vpx_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p,
|
|||||||
const uint8_t *_limit1,
|
const uint8_t *_limit1,
|
||||||
const uint8_t *_thresh1,
|
const uint8_t *_thresh1,
|
||||||
int bd) {
|
int bd) {
|
||||||
vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
|
vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, bd);
|
||||||
vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1,
|
vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd);
|
||||||
bd);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE void highbd_transpose(uint16_t *src[], int in_p,
|
static INLINE void highbd_transpose(uint16_t *src[], int in_p,
|
||||||
@ -1058,11 +1035,10 @@ void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,
|
|||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh,
|
const uint8_t *thresh,
|
||||||
int count, int bd) {
|
int bd) {
|
||||||
DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]);
|
DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]);
|
||||||
uint16_t *src[1];
|
uint16_t *src[1];
|
||||||
uint16_t *dst[1];
|
uint16_t *dst[1];
|
||||||
(void)count;
|
|
||||||
|
|
||||||
// Transpose 8x8
|
// Transpose 8x8
|
||||||
src[0] = s - 4;
|
src[0] = s - 4;
|
||||||
@ -1071,8 +1047,7 @@ void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,
|
|||||||
highbd_transpose(src, p, dst, 8, 1);
|
highbd_transpose(src, p, dst, 8, 1);
|
||||||
|
|
||||||
// Loop filtering
|
// Loop filtering
|
||||||
vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
|
vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd);
|
||||||
bd);
|
|
||||||
|
|
||||||
src[0] = t_dst;
|
src[0] = t_dst;
|
||||||
dst[0] = s - 4;
|
dst[0] = s - 4;
|
||||||
@ -1112,11 +1087,10 @@ void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,
|
|||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh,
|
const uint8_t *thresh,
|
||||||
int count, int bd) {
|
int bd) {
|
||||||
DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]);
|
DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]);
|
||||||
uint16_t *src[1];
|
uint16_t *src[1];
|
||||||
uint16_t *dst[1];
|
uint16_t *dst[1];
|
||||||
(void)count;
|
|
||||||
|
|
||||||
// Transpose 8x8
|
// Transpose 8x8
|
||||||
src[0] = s - 4;
|
src[0] = s - 4;
|
||||||
@ -1125,8 +1099,7 @@ void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,
|
|||||||
highbd_transpose(src, p, dst, 8, 1);
|
highbd_transpose(src, p, dst, 8, 1);
|
||||||
|
|
||||||
// Loop filtering
|
// Loop filtering
|
||||||
vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
|
vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd);
|
||||||
bd);
|
|
||||||
|
|
||||||
src[0] = t_dst;
|
src[0] = t_dst;
|
||||||
dst[0] = s - 4;
|
dst[0] = s - 4;
|
||||||
@ -1181,7 +1154,7 @@ void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p,
|
|||||||
highbd_transpose(src, p, dst, 8, 2);
|
highbd_transpose(src, p, dst, 8, 2);
|
||||||
|
|
||||||
// Loop filtering
|
// Loop filtering
|
||||||
highbd_mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit,
|
vpx_highbd_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit,
|
||||||
thresh, bd);
|
thresh, bd);
|
||||||
src[0] = t_dst;
|
src[0] = t_dst;
|
||||||
src[1] = t_dst + 8 * 8;
|
src[1] = t_dst + 8 * 8;
|
||||||
@ -1205,7 +1178,7 @@ void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s,
|
|||||||
highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
|
highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
|
||||||
|
|
||||||
// Loop filtering
|
// Loop filtering
|
||||||
highbd_mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit,
|
vpx_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit,
|
||||||
thresh, bd);
|
thresh, bd);
|
||||||
|
|
||||||
// Transpose back
|
// Transpose back
|
||||||
|
@ -13,8 +13,9 @@
|
|||||||
#include "./vpx_dsp_rtcd.h"
|
#include "./vpx_dsp_rtcd.h"
|
||||||
#include "vpx_ports/mem.h"
|
#include "vpx_ports/mem.h"
|
||||||
|
|
||||||
static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,
|
void vpx_lpf_horizontal_edge_8_avx2(unsigned char *s, int p,
|
||||||
const unsigned char *_blimit, const unsigned char *_limit,
|
const unsigned char *_blimit,
|
||||||
|
const unsigned char *_limit,
|
||||||
const unsigned char *_thresh) {
|
const unsigned char *_thresh) {
|
||||||
__m128i mask, hev, flat, flat2;
|
__m128i mask, hev, flat, flat2;
|
||||||
const __m128i zero = _mm_set1_epi16(0);
|
const __m128i zero = _mm_set1_epi16(0);
|
||||||
@ -400,8 +401,9 @@ DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = {
|
|||||||
8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128
|
8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128
|
||||||
};
|
};
|
||||||
|
|
||||||
static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
|
void vpx_lpf_horizontal_edge_16_avx2(unsigned char *s, int p,
|
||||||
const unsigned char *_blimit, const unsigned char *_limit,
|
const unsigned char *_blimit,
|
||||||
|
const unsigned char *_limit,
|
||||||
const unsigned char *_thresh) {
|
const unsigned char *_thresh) {
|
||||||
__m128i mask, hev, flat, flat2;
|
__m128i mask, hev, flat, flat2;
|
||||||
const __m128i zero = _mm_set1_epi16(0);
|
const __m128i zero = _mm_set1_epi16(0);
|
||||||
@ -975,12 +977,3 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
|
|||||||
_mm_storeu_si128((__m128i *) (s + 6 * p), q6);
|
_mm_storeu_si128((__m128i *) (s + 6 * p), q6);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p,
|
|
||||||
const unsigned char *_blimit, const unsigned char *_limit,
|
|
||||||
const unsigned char *_thresh, int count) {
|
|
||||||
if (count == 1)
|
|
||||||
mb_lpf_horizontal_edge_w_avx2_8(s, p, _blimit, _limit, _thresh);
|
|
||||||
else
|
|
||||||
mb_lpf_horizontal_edge_w_avx2_16(s, p, _blimit, _limit, _thresh);
|
|
||||||
}
|
|
||||||
|
@ -18,14 +18,13 @@
|
|||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *blimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh
|
||||||
; int count
|
|
||||||
;)
|
;)
|
||||||
global sym(vpx_lpf_horizontal_4_mmx) PRIVATE
|
global sym(vpx_lpf_horizontal_4_mmx) PRIVATE
|
||||||
sym(vpx_lpf_horizontal_4_mmx):
|
sym(vpx_lpf_horizontal_4_mmx):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 6
|
SHADOW_ARGS_TO_STACK 5
|
||||||
GET_GOT rbx
|
GET_GOT rbx
|
||||||
push rsi
|
push rsi
|
||||||
push rdi
|
push rdi
|
||||||
@ -39,8 +38,6 @@ sym(vpx_lpf_horizontal_4_mmx):
|
|||||||
mov rsi, arg(0) ;src_ptr
|
mov rsi, arg(0) ;src_ptr
|
||||||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||||
|
|
||||||
movsxd rcx, dword ptr arg(5) ;count
|
|
||||||
.next8_h:
|
|
||||||
mov rdx, arg(3) ;limit
|
mov rdx, arg(3) ;limit
|
||||||
movq mm7, [rdx]
|
movq mm7, [rdx]
|
||||||
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
||||||
@ -208,11 +205,6 @@ sym(vpx_lpf_horizontal_4_mmx):
|
|||||||
pxor mm7, [GLOBAL(t80)] ; unoffset
|
pxor mm7, [GLOBAL(t80)] ; unoffset
|
||||||
movq [rdi], mm7 ; write back
|
movq [rdi], mm7 ; write back
|
||||||
|
|
||||||
add rsi,8
|
|
||||||
neg rax
|
|
||||||
dec rcx
|
|
||||||
jnz .next8_h
|
|
||||||
|
|
||||||
add rsp, 32
|
add rsp, 32
|
||||||
pop rsp
|
pop rsp
|
||||||
; begin epilog
|
; begin epilog
|
||||||
@ -230,14 +222,13 @@ sym(vpx_lpf_horizontal_4_mmx):
|
|||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *blimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh
|
||||||
; int count
|
|
||||||
;)
|
;)
|
||||||
global sym(vpx_lpf_vertical_4_mmx) PRIVATE
|
global sym(vpx_lpf_vertical_4_mmx) PRIVATE
|
||||||
sym(vpx_lpf_vertical_4_mmx):
|
sym(vpx_lpf_vertical_4_mmx):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 6
|
SHADOW_ARGS_TO_STACK 5
|
||||||
GET_GOT rbx
|
GET_GOT rbx
|
||||||
push rsi
|
push rsi
|
||||||
push rdi
|
push rdi
|
||||||
@ -254,8 +245,6 @@ sym(vpx_lpf_vertical_4_mmx):
|
|||||||
|
|
||||||
lea rsi, [rsi + rax*4 - 4]
|
lea rsi, [rsi + rax*4 - 4]
|
||||||
|
|
||||||
movsxd rcx, dword ptr arg(5) ;count
|
|
||||||
.next8_v:
|
|
||||||
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
||||||
add rdi, rax
|
add rdi, rax
|
||||||
|
|
||||||
@ -579,10 +568,6 @@ sym(vpx_lpf_vertical_4_mmx):
|
|||||||
|
|
||||||
movd [rdi+rax*2+2], mm5
|
movd [rdi+rax*2+2], mm5
|
||||||
|
|
||||||
lea rsi, [rsi+rax*8]
|
|
||||||
dec rcx
|
|
||||||
jnz .next8_v
|
|
||||||
|
|
||||||
add rsp, 64
|
add rsp, 64
|
||||||
pop rsp
|
pop rsp
|
||||||
; begin epilog
|
; begin epilog
|
||||||
|
@ -18,8 +18,7 @@ static INLINE __m128i abs_diff(__m128i a, __m128i b) {
|
|||||||
return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a));
|
return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s,
|
void vpx_lpf_horizontal_edge_8_sse2(unsigned char *s, int p,
|
||||||
int p,
|
|
||||||
const unsigned char *_blimit,
|
const unsigned char *_blimit,
|
||||||
const unsigned char *_limit,
|
const unsigned char *_limit,
|
||||||
const unsigned char *_thresh) {
|
const unsigned char *_thresh) {
|
||||||
@ -383,8 +382,7 @@ static INLINE __m128i filter16_mask(const __m128i *const flat,
|
|||||||
return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result);
|
return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s,
|
void vpx_lpf_horizontal_edge_16_sse2(unsigned char *s, int p,
|
||||||
int p,
|
|
||||||
const unsigned char *_blimit,
|
const unsigned char *_blimit,
|
||||||
const unsigned char *_limit,
|
const unsigned char *_limit,
|
||||||
const unsigned char *_thresh) {
|
const unsigned char *_thresh) {
|
||||||
@ -716,21 +714,10 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
|
|
||||||
void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p,
|
|
||||||
const unsigned char *_blimit,
|
|
||||||
const unsigned char *_limit,
|
|
||||||
const unsigned char *_thresh, int count) {
|
|
||||||
if (count == 1)
|
|
||||||
mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh);
|
|
||||||
else
|
|
||||||
mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);
|
|
||||||
}
|
|
||||||
|
|
||||||
void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,
|
void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,
|
||||||
const unsigned char *_blimit,
|
const unsigned char *_blimit,
|
||||||
const unsigned char *_limit,
|
const unsigned char *_limit,
|
||||||
const unsigned char *_thresh, int count) {
|
const unsigned char *_thresh) {
|
||||||
DECLARE_ALIGNED(16, unsigned char, flat_op2[16]);
|
DECLARE_ALIGNED(16, unsigned char, flat_op2[16]);
|
||||||
DECLARE_ALIGNED(16, unsigned char, flat_op1[16]);
|
DECLARE_ALIGNED(16, unsigned char, flat_op1[16]);
|
||||||
DECLARE_ALIGNED(16, unsigned char, flat_op0[16]);
|
DECLARE_ALIGNED(16, unsigned char, flat_op0[16]);
|
||||||
@ -745,8 +732,6 @@ void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,
|
|||||||
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
|
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
|
||||||
__m128i q3p3, q2p2, q1p1, q0p0, p1q1, p0q0;
|
__m128i q3p3, q2p2, q1p1, q0p0, p1q1, p0q0;
|
||||||
|
|
||||||
(void)count;
|
|
||||||
|
|
||||||
q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * p)),
|
q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * p)),
|
||||||
_mm_loadl_epi64((__m128i *)(s + 3 * p)));
|
_mm_loadl_epi64((__m128i *)(s + 3 * p)));
|
||||||
q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)),
|
q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)),
|
||||||
@ -1492,11 +1477,10 @@ void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
|
|||||||
void vpx_lpf_vertical_8_sse2(unsigned char *s, int p,
|
void vpx_lpf_vertical_8_sse2(unsigned char *s, int p,
|
||||||
const unsigned char *blimit,
|
const unsigned char *blimit,
|
||||||
const unsigned char *limit,
|
const unsigned char *limit,
|
||||||
const unsigned char *thresh, int count) {
|
const unsigned char *thresh) {
|
||||||
DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 8]);
|
DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 8]);
|
||||||
unsigned char *src[1];
|
unsigned char *src[1];
|
||||||
unsigned char *dst[1];
|
unsigned char *dst[1];
|
||||||
(void)count;
|
|
||||||
|
|
||||||
// Transpose 8x8
|
// Transpose 8x8
|
||||||
src[0] = s - 4;
|
src[0] = s - 4;
|
||||||
@ -1505,7 +1489,7 @@ void vpx_lpf_vertical_8_sse2(unsigned char *s, int p,
|
|||||||
transpose(src, p, dst, 8, 1);
|
transpose(src, p, dst, 8, 1);
|
||||||
|
|
||||||
// Loop filtering
|
// Loop filtering
|
||||||
vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);
|
vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh);
|
||||||
|
|
||||||
src[0] = t_dst;
|
src[0] = t_dst;
|
||||||
dst[0] = s - 4;
|
dst[0] = s - 4;
|
||||||
@ -1557,7 +1541,7 @@ void vpx_lpf_vertical_16_sse2(unsigned char *s, int p,
|
|||||||
transpose(src, p, dst, 8, 2);
|
transpose(src, p, dst, 8, 2);
|
||||||
|
|
||||||
// Loop filtering
|
// Loop filtering
|
||||||
mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit, thresh);
|
vpx_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh);
|
||||||
|
|
||||||
src[0] = t_dst;
|
src[0] = t_dst;
|
||||||
src[1] = t_dst + 8 * 8;
|
src[1] = t_dst + 8 * 8;
|
||||||
@ -1578,8 +1562,7 @@ void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
|
|||||||
transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
|
transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
|
||||||
|
|
||||||
// Loop filtering
|
// Loop filtering
|
||||||
mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit,
|
vpx_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh);
|
||||||
thresh);
|
|
||||||
|
|
||||||
// Transpose back
|
// Transpose back
|
||||||
transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p);
|
transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user