Rename loop filter function from vp9_ to vpx_

Change-Id: I6f424bb8daec26bf8482b5d75dd9b0e45c11a665
This commit is contained in:
Jingning Han 2015-07-17 12:31:53 -07:00
parent 4735edd00f
commit 2992739b5d
42 changed files with 904 additions and 904 deletions

View File

@ -60,49 +60,49 @@ typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
vp9_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
vpx_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
}
void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
vp9_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
vpx_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
}
void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
vp9_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
vpx_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
}
void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
vp9_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
vpx_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
}
#else
void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
vp9_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
vpx_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
vp9_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
vpx_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSE2
@ -114,25 +114,25 @@ void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
vp9_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
vpx_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON_ASM
@ -141,13 +141,13 @@ void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
vp9_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
vpx_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
}
#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
@ -534,46 +534,46 @@ using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test6Param,
::testing::Values(
make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
&vp9_highbd_lpf_horizontal_4_c, 8, 1),
make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
&vp9_highbd_lpf_vertical_4_c, 8, 1),
make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
&vp9_highbd_lpf_horizontal_8_c, 8, 1),
make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
&vp9_highbd_lpf_horizontal_16_c, 8, 1),
make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
&vp9_highbd_lpf_horizontal_16_c, 8, 2),
make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
&vp9_highbd_lpf_vertical_8_c, 8, 1),
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
&vpx_highbd_lpf_horizontal_4_c, 8, 1),
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
&vpx_highbd_lpf_vertical_4_c, 8, 1),
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
&vpx_highbd_lpf_horizontal_8_c, 8, 1),
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
&vpx_highbd_lpf_horizontal_16_c, 8, 1),
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
&vpx_highbd_lpf_horizontal_16_c, 8, 2),
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
&vpx_highbd_lpf_vertical_8_c, 8, 1),
make_tuple(&wrapper_vertical_16_sse2,
&wrapper_vertical_16_c, 8, 1),
make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
&vp9_highbd_lpf_horizontal_4_c, 10, 1),
make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
&vp9_highbd_lpf_vertical_4_c, 10, 1),
make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
&vp9_highbd_lpf_horizontal_8_c, 10, 1),
make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
&vp9_highbd_lpf_horizontal_16_c, 10, 1),
make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
&vp9_highbd_lpf_horizontal_16_c, 10, 2),
make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
&vp9_highbd_lpf_vertical_8_c, 10, 1),
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
&vpx_highbd_lpf_horizontal_4_c, 10, 1),
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
&vpx_highbd_lpf_vertical_4_c, 10, 1),
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
&vpx_highbd_lpf_horizontal_8_c, 10, 1),
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
&vpx_highbd_lpf_horizontal_16_c, 10, 1),
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
&vpx_highbd_lpf_horizontal_16_c, 10, 2),
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
&vpx_highbd_lpf_vertical_8_c, 10, 1),
make_tuple(&wrapper_vertical_16_sse2,
&wrapper_vertical_16_c, 10, 1),
make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
&vp9_highbd_lpf_horizontal_4_c, 12, 1),
make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
&vp9_highbd_lpf_vertical_4_c, 12, 1),
make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
&vp9_highbd_lpf_horizontal_8_c, 12, 1),
make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
&vp9_highbd_lpf_horizontal_16_c, 12, 1),
make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
&vp9_highbd_lpf_horizontal_16_c, 12, 2),
make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
&vp9_highbd_lpf_vertical_8_c, 12, 1),
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
&vpx_highbd_lpf_horizontal_4_c, 12, 1),
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
&vpx_highbd_lpf_vertical_4_c, 12, 1),
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
&vpx_highbd_lpf_horizontal_8_c, 12, 1),
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
&vpx_highbd_lpf_horizontal_16_c, 12, 1),
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
&vpx_highbd_lpf_horizontal_16_c, 12, 2),
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
&vpx_highbd_lpf_vertical_8_c, 12, 1),
make_tuple(&wrapper_vertical_16_sse2,
&wrapper_vertical_16_c, 12, 1),
make_tuple(&wrapper_vertical_16_dual_sse2,
@ -586,10 +586,10 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test6Param,
::testing::Values(
make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8, 1),
make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 1),
make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 2),
make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),
make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1),
make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif
@ -598,8 +598,8 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
AVX2, Loop8Test6Param,
::testing::Values(
make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8, 1),
make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8,
make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8,
2)));
#endif
@ -608,42 +608,42 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test9Param,
::testing::Values(
make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
&vp9_highbd_lpf_horizontal_4_dual_c, 8),
make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
&vp9_highbd_lpf_horizontal_8_dual_c, 8),
make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
&vp9_highbd_lpf_vertical_4_dual_c, 8),
make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
&vp9_highbd_lpf_vertical_8_dual_c, 8),
make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
&vp9_highbd_lpf_horizontal_4_dual_c, 10),
make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
&vp9_highbd_lpf_horizontal_8_dual_c, 10),
make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
&vp9_highbd_lpf_vertical_4_dual_c, 10),
make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
&vp9_highbd_lpf_vertical_8_dual_c, 10),
make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
&vp9_highbd_lpf_horizontal_4_dual_c, 12),
make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
&vp9_highbd_lpf_horizontal_8_dual_c, 12),
make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
&vp9_highbd_lpf_vertical_4_dual_c, 12),
make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
&vp9_highbd_lpf_vertical_8_dual_c, 12)));
make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
&vpx_highbd_lpf_horizontal_4_dual_c, 8),
make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
&vpx_highbd_lpf_horizontal_8_dual_c, 8),
make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
&vpx_highbd_lpf_vertical_4_dual_c, 8),
make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
&vpx_highbd_lpf_vertical_8_dual_c, 8),
make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
&vpx_highbd_lpf_horizontal_4_dual_c, 10),
make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
&vpx_highbd_lpf_horizontal_8_dual_c, 10),
make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
&vpx_highbd_lpf_vertical_4_dual_c, 10),
make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
&vpx_highbd_lpf_vertical_8_dual_c, 10),
make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
&vpx_highbd_lpf_horizontal_4_dual_c, 12),
make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
&vpx_highbd_lpf_horizontal_8_dual_c, 12),
make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
&vpx_highbd_lpf_vertical_4_dual_c, 12),
make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
&vpx_highbd_lpf_vertical_8_dual_c, 12)));
#else
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test9Param,
::testing::Values(
make_tuple(&vp9_lpf_horizontal_4_dual_sse2,
&vp9_lpf_horizontal_4_dual_c, 8),
make_tuple(&vp9_lpf_horizontal_8_dual_sse2,
&vp9_lpf_horizontal_8_dual_c, 8),
make_tuple(&vp9_lpf_vertical_4_dual_sse2,
&vp9_lpf_vertical_4_dual_c, 8),
make_tuple(&vp9_lpf_vertical_8_dual_sse2,
&vp9_lpf_vertical_8_dual_c, 8)));
make_tuple(&vpx_lpf_horizontal_4_dual_sse2,
&vpx_lpf_horizontal_4_dual_c, 8),
make_tuple(&vpx_lpf_horizontal_8_dual_sse2,
&vpx_lpf_horizontal_8_dual_c, 8),
make_tuple(&vpx_lpf_vertical_4_dual_sse2,
&vpx_lpf_vertical_4_dual_c, 8),
make_tuple(&vpx_lpf_vertical_8_dual_sse2,
&vpx_lpf_vertical_8_dual_c, 8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif
@ -657,36 +657,36 @@ INSTANTIATE_TEST_CASE_P(
#if HAVE_NEON_ASM
// Using #if inside the macro is unsupported on MSVS but the tests are not
// currently built for MSVS with ARM and NEON.
make_tuple(&vp9_lpf_horizontal_16_neon,
&vp9_lpf_horizontal_16_c, 8, 1),
make_tuple(&vp9_lpf_horizontal_16_neon,
&vp9_lpf_horizontal_16_c, 8, 2),
make_tuple(&vpx_lpf_horizontal_16_neon,
&vpx_lpf_horizontal_16_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_neon,
&vpx_lpf_horizontal_16_c, 8, 2),
make_tuple(&wrapper_vertical_16_neon,
&wrapper_vertical_16_c, 8, 1),
make_tuple(&wrapper_vertical_16_dual_neon,
&wrapper_vertical_16_dual_c, 8, 1),
#endif // HAVE_NEON_ASM
make_tuple(&vp9_lpf_horizontal_8_neon,
&vp9_lpf_horizontal_8_c, 8, 1),
make_tuple(&vp9_lpf_vertical_8_neon,
&vp9_lpf_vertical_8_c, 8, 1),
make_tuple(&vp9_lpf_horizontal_4_neon,
&vp9_lpf_horizontal_4_c, 8, 1),
make_tuple(&vp9_lpf_vertical_4_neon,
&vp9_lpf_vertical_4_c, 8, 1)));
make_tuple(&vpx_lpf_horizontal_8_neon,
&vpx_lpf_horizontal_8_c, 8, 1),
make_tuple(&vpx_lpf_vertical_8_neon,
&vpx_lpf_vertical_8_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_4_neon,
&vpx_lpf_horizontal_4_c, 8, 1),
make_tuple(&vpx_lpf_vertical_4_neon,
&vpx_lpf_vertical_4_c, 8, 1)));
INSTANTIATE_TEST_CASE_P(
NEON, Loop8Test9Param,
::testing::Values(
#if HAVE_NEON_ASM
make_tuple(&vp9_lpf_horizontal_8_dual_neon,
&vp9_lpf_horizontal_8_dual_c, 8),
make_tuple(&vp9_lpf_vertical_8_dual_neon,
&vp9_lpf_vertical_8_dual_c, 8),
make_tuple(&vpx_lpf_horizontal_8_dual_neon,
&vpx_lpf_horizontal_8_dual_c, 8),
make_tuple(&vpx_lpf_vertical_8_dual_neon,
&vpx_lpf_vertical_8_dual_c, 8),
#endif // HAVE_NEON_ASM
make_tuple(&vp9_lpf_horizontal_4_dual_neon,
&vp9_lpf_horizontal_4_dual_c, 8),
make_tuple(&vp9_lpf_vertical_4_dual_neon,
&vp9_lpf_vertical_4_dual_c, 8)));
make_tuple(&vpx_lpf_horizontal_4_dual_neon,
&vpx_lpf_horizontal_4_dual_c, 8),
make_tuple(&vpx_lpf_vertical_4_dual_neon,
&vpx_lpf_vertical_4_dual_c, 8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON
@ -694,23 +694,23 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test6Param,
::testing::Values(
make_tuple(&vp9_lpf_horizontal_8_msa, &vp9_lpf_horizontal_8_c, 8, 1),
make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 1),
make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 2),
make_tuple(&vp9_lpf_vertical_8_msa, &vp9_lpf_vertical_8_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),
make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1),
make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));
INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test9Param,
::testing::Values(
make_tuple(&vp9_lpf_horizontal_4_dual_msa,
&vp9_lpf_horizontal_4_dual_c, 8),
make_tuple(&vp9_lpf_horizontal_8_dual_msa,
&vp9_lpf_horizontal_8_dual_c, 8),
make_tuple(&vp9_lpf_vertical_4_dual_msa,
&vp9_lpf_vertical_4_dual_c, 8),
make_tuple(&vp9_lpf_vertical_8_dual_msa,
&vp9_lpf_vertical_8_dual_c, 8)));
make_tuple(&vpx_lpf_horizontal_4_dual_msa,
&vpx_lpf_horizontal_4_dual_c, 8),
make_tuple(&vpx_lpf_horizontal_8_dual_msa,
&vpx_lpf_horizontal_8_dual_c, 8),
make_tuple(&vpx_lpf_vertical_4_dual_msa,
&vpx_lpf_vertical_4_dual_c, 8),
make_tuple(&vpx_lpf_vertical_8_dual_msa,
&vpx_lpf_vertical_8_dual_c, 8)));
#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
} // namespace

View File

@ -91,6 +91,7 @@ endif
## shared library builds don't make these functions accessible.
##
ifeq ($(CONFIG_SHARED),)
LIBVPX_TEST_SRCS-$(CONFIG_VP9) += lpf_8_test.cc
## VP8
ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
@ -142,7 +143,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc

View File

@ -44,7 +44,7 @@ static void convolve_bi_avg_vert_4_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_store(dst + dst_stride);
prefetch_store(dst + dst_stride);
for (x = 0; x < w; x += 4) {
src_ptr = src + x;
@ -148,8 +148,8 @@ static void convolve_bi_avg_vert_64_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_store(dst + dst_stride);
vp9_prefetch_store(dst + dst_stride + 32);
prefetch_store(dst + dst_stride);
prefetch_store(dst + dst_stride + 32);
for (x = 0; x < 64; x += 4) {
src_ptr = src + x;
@ -245,7 +245,7 @@ void vp9_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
: [pos] "r" (pos)
);
vp9_prefetch_store(dst);
prefetch_store(dst);
switch (w) {
case 4:
@ -257,7 +257,7 @@ void vp9_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
filter_y, w, h);
break;
case 64:
vp9_prefetch_store(dst + 32);
prefetch_store(dst + 32);
convolve_bi_avg_vert_64_dspr2(src, src_stride,
dst, dst_stride,
filter_y, h);

View File

@ -40,9 +40,9 @@ static void convolve_bi_avg_horiz_4_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -135,9 +135,9 @@ static void convolve_bi_avg_horiz_8_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -290,9 +290,9 @@ static void convolve_bi_avg_horiz_16_dspr2(const uint8_t *src_ptr,
dst = dst_ptr;
/* prefetch data to cache memory */
vp9_prefetch_load(src_ptr + src_stride);
vp9_prefetch_load(src_ptr + src_stride + 32);
vp9_prefetch_store(dst_ptr + dst_stride);
prefetch_load(src_ptr + src_stride);
prefetch_load(src_ptr + src_stride + 32);
prefetch_store(dst_ptr + dst_stride);
for (c = 0; c < count; c++) {
__asm__ __volatile__ (
@ -539,11 +539,11 @@ static void convolve_bi_avg_horiz_64_dspr2(const uint8_t *src_ptr,
dst = dst_ptr;
/* prefetch data to cache memory */
vp9_prefetch_load(src_ptr + src_stride);
vp9_prefetch_load(src_ptr + src_stride + 32);
vp9_prefetch_load(src_ptr + src_stride + 64);
vp9_prefetch_store(dst_ptr + dst_stride);
vp9_prefetch_store(dst_ptr + dst_stride + 32);
prefetch_load(src_ptr + src_stride);
prefetch_load(src_ptr + src_stride + 32);
prefetch_load(src_ptr + src_stride + 64);
prefetch_store(dst_ptr + dst_stride);
prefetch_store(dst_ptr + dst_stride + 32);
for (c = 0; c < 4; c++) {
__asm__ __volatile__ (
@ -781,9 +781,9 @@ void vp9_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
);
/* prefetch data to cache memory */
vp9_prefetch_load(src);
vp9_prefetch_load(src + 32);
vp9_prefetch_store(dst);
prefetch_load(src);
prefetch_load(src + 32);
prefetch_store(dst);
switch (w) {
case 4:
@ -807,8 +807,8 @@ void vp9_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
filter_x, h, 2);
break;
case 64:
vp9_prefetch_load(src + 64);
vp9_prefetch_store(dst + 32);
prefetch_load(src + 64);
prefetch_store(dst + 32);
convolve_bi_avg_horiz_64_dspr2(src, src_stride,
dst, dst_stride,

View File

@ -41,8 +41,8 @@ static void convolve_bi_horiz_4_transposed_dspr2(const uint8_t *src,
for (y = h; y--;) {
dst_ptr = dst;
/* prefetch data to cache memory */
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -132,8 +132,8 @@ static void convolve_bi_horiz_8_transposed_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
dst_ptr = dst;
odd_dst = (dst_ptr + dst_stride);
@ -272,8 +272,8 @@ static void convolve_bi_horiz_16_transposed_dspr2(const uint8_t *src_ptr,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src_ptr + src_stride);
vp9_prefetch_load(src_ptr + src_stride + 32);
prefetch_load(src_ptr + src_stride);
prefetch_load(src_ptr + src_stride + 32);
src = src_ptr;
dst = dst_ptr;
@ -504,9 +504,9 @@ static void convolve_bi_horiz_64_transposed_dspr2(const uint8_t *src_ptr,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src_ptr + src_stride);
vp9_prefetch_load(src_ptr + src_stride + 32);
vp9_prefetch_load(src_ptr + src_stride + 64);
prefetch_load(src_ptr + src_stride);
prefetch_load(src_ptr + src_stride + 32);
prefetch_load(src_ptr + src_stride + 64);
src = src_ptr;
dst = dst_ptr;
@ -747,8 +747,8 @@ void vp9_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride,
);
/* prefetch data to cache memory */
vp9_prefetch_load(src);
vp9_prefetch_load(src + 32);
prefetch_load(src);
prefetch_load(src + 32);
switch (w) {
case 4:
@ -769,7 +769,7 @@ void vp9_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride,
(w/16));
break;
case 64:
vp9_prefetch_load(src + 32);
prefetch_load(src + 32);
convolve_bi_horiz_64_transposed_dspr2(src, src_stride,
dst, dst_stride,
filter, h);

View File

@ -39,9 +39,9 @@ static void convolve_bi_horiz_4_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -122,9 +122,9 @@ static void convolve_bi_horiz_8_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -252,9 +252,9 @@ static void convolve_bi_horiz_16_dspr2(const uint8_t *src_ptr,
dst = dst_ptr;
/* prefetch data to cache memory */
vp9_prefetch_load(src_ptr + src_stride);
vp9_prefetch_load(src_ptr + src_stride + 32);
vp9_prefetch_store(dst_ptr + dst_stride);
prefetch_load(src_ptr + src_stride);
prefetch_load(src_ptr + src_stride + 32);
prefetch_store(dst_ptr + dst_stride);
for (c = 0; c < count; c++) {
__asm__ __volatile__ (
@ -459,11 +459,11 @@ static void convolve_bi_horiz_64_dspr2(const uint8_t *src_ptr,
dst = dst_ptr;
/* prefetch data to cache memory */
vp9_prefetch_load(src_ptr + src_stride);
vp9_prefetch_load(src_ptr + src_stride + 32);
vp9_prefetch_load(src_ptr + src_stride + 64);
vp9_prefetch_store(dst_ptr + dst_stride);
vp9_prefetch_store(dst_ptr + dst_stride + 32);
prefetch_load(src_ptr + src_stride);
prefetch_load(src_ptr + src_stride + 32);
prefetch_load(src_ptr + src_stride + 64);
prefetch_store(dst_ptr + dst_stride);
prefetch_store(dst_ptr + dst_stride + 32);
for (c = 0; c < 4; c++) {
__asm__ __volatile__ (
@ -651,7 +651,7 @@ void vp9_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
if (16 == x_step_q4) {
uint32_t pos = 38;
vp9_prefetch_load((const uint8_t *)filter_x);
prefetch_load((const uint8_t *)filter_x);
/* bit positon for extract from acc */
__asm__ __volatile__ (
@ -661,9 +661,9 @@ void vp9_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
);
/* prefetch data to cache memory */
vp9_prefetch_load(src);
vp9_prefetch_load(src + 32);
vp9_prefetch_store(dst);
prefetch_load(src);
prefetch_load(src + 32);
prefetch_store(dst);
switch (w) {
case 4:
@ -687,8 +687,8 @@ void vp9_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
filter_x, (int32_t)h, 2);
break;
case 64:
vp9_prefetch_load(src + 64);
vp9_prefetch_store(dst + 32);
prefetch_load(src + 64);
prefetch_store(dst + 32);
convolve_bi_horiz_64_dspr2(src, (int32_t)src_stride,
dst, (int32_t)dst_stride,

View File

@ -44,7 +44,7 @@ static void convolve_bi_vert_4_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_store(dst + dst_stride);
prefetch_store(dst + dst_stride);
for (x = 0; x < w; x += 4) {
src_ptr = src + x;
@ -141,7 +141,7 @@ static void convolve_bi_vert_64_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_store(dst + dst_stride);
prefetch_store(dst + dst_stride);
for (x = 0; x < 64; x += 4) {
src_ptr = src + x;
@ -230,7 +230,7 @@ void vp9_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
: [pos] "r" (pos)
);
vp9_prefetch_store(dst);
prefetch_store(dst);
switch (w) {
case 4 :
@ -242,7 +242,7 @@ void vp9_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
filter_y, w, h);
break;
case 64 :
vp9_prefetch_store(dst + 32);
prefetch_store(dst + 32);
convolve_bi_vert_64_dspr2(src, src_stride,
dst, dst_stride,
filter_y, h);

View File

@ -49,7 +49,7 @@ static void convolve_avg_vert_4_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_store(dst + dst_stride);
prefetch_store(dst + dst_stride);
for (x = 0; x < w; x += 4) {
src_ptr = src + x;
@ -210,8 +210,8 @@ static void convolve_avg_vert_64_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_store(dst + dst_stride);
vp9_prefetch_store(dst + dst_stride + 32);
prefetch_store(dst + dst_stride);
prefetch_store(dst + dst_stride + 32);
for (x = 0; x < 64; x += 4) {
src_ptr = src + x;
@ -372,7 +372,7 @@ void vp9_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
: [pos] "r" (pos)
);
vp9_prefetch_store(dst);
prefetch_store(dst);
switch (w) {
case 4:
@ -384,7 +384,7 @@ void vp9_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
filter_y, w, h);
break;
case 64:
vp9_prefetch_store(dst + 32);
prefetch_store(dst + 32);
convolve_avg_vert_64_dspr2(src, src_stride,
dst, dst_stride,
filter_y, h);
@ -452,17 +452,17 @@ void vp9_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint32_t tp3, tp4, tn2;
/* prefetch data to cache memory */
vp9_prefetch_load(src);
vp9_prefetch_load(src + 32);
vp9_prefetch_store(dst);
prefetch_load(src);
prefetch_load(src + 32);
prefetch_store(dst);
switch (w) {
case 4:
/* 1 word storage */
for (y = h; y--; ) {
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -482,9 +482,9 @@ void vp9_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
case 8:
/* 2 word storage */
for (y = h; y--; ) {
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -509,9 +509,9 @@ void vp9_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
case 16:
/* 4 word storage */
for (y = h; y--; ) {
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -544,9 +544,9 @@ void vp9_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
case 32:
/* 8 word storage */
for (y = h; y--; ) {
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -593,16 +593,16 @@ void vp9_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
}
break;
case 64:
vp9_prefetch_load(src + 64);
vp9_prefetch_store(dst + 32);
prefetch_load(src + 64);
prefetch_store(dst + 32);
/* 16 word storage */
for (y = h; y--; ) {
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_load(src + src_stride + 64);
vp9_prefetch_store(dst + dst_stride);
vp9_prefetch_store(dst + dst_stride + 32);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_load(src + src_stride + 64);
prefetch_store(dst + dst_stride);
prefetch_store(dst + dst_stride + 32);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"

View File

@ -43,9 +43,9 @@ static void convolve_avg_horiz_4_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -165,9 +165,9 @@ static void convolve_avg_horiz_8_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -357,9 +357,9 @@ static void convolve_avg_horiz_16_dspr2(const uint8_t *src_ptr,
dst = dst_ptr;
/* prefetch data to cache memory */
vp9_prefetch_load(src_ptr + src_stride);
vp9_prefetch_load(src_ptr + src_stride + 32);
vp9_prefetch_store(dst_ptr + dst_stride);
prefetch_load(src_ptr + src_stride);
prefetch_load(src_ptr + src_stride + 32);
prefetch_store(dst_ptr + dst_stride);
for (c = 0; c < count; c++) {
__asm__ __volatile__ (
@ -668,11 +668,11 @@ static void convolve_avg_horiz_64_dspr2(const uint8_t *src_ptr,
dst = dst_ptr;
/* prefetch data to cache memory */
vp9_prefetch_load(src_ptr + src_stride);
vp9_prefetch_load(src_ptr + src_stride + 32);
vp9_prefetch_load(src_ptr + src_stride + 64);
vp9_prefetch_store(dst_ptr + dst_stride);
vp9_prefetch_store(dst_ptr + dst_stride + 32);
prefetch_load(src_ptr + src_stride);
prefetch_load(src_ptr + src_stride + 32);
prefetch_load(src_ptr + src_stride + 64);
prefetch_store(dst_ptr + dst_stride);
prefetch_store(dst_ptr + dst_stride + 32);
for (c = 0; c < 4; c++) {
__asm__ __volatile__ (
@ -985,9 +985,9 @@ void vp9_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
);
/* prefetch data to cache memory */
vp9_prefetch_load(src);
vp9_prefetch_load(src + 32);
vp9_prefetch_store(dst);
prefetch_load(src);
prefetch_load(src + 32);
prefetch_store(dst);
switch (w) {
case 4:
@ -1011,8 +1011,8 @@ void vp9_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
filter_x, h, 2);
break;
case 64:
vp9_prefetch_load(src + 64);
vp9_prefetch_store(dst + 32);
prefetch_load(src + 64);
prefetch_store(dst + 32);
convolve_avg_horiz_64_dspr2(src, src_stride,
dst, dst_stride,

View File

@ -60,8 +60,8 @@ static void convolve_horiz_4_transposed_dspr2(const uint8_t *src,
for (y = h; y--;) {
dst_ptr = dst;
/* prefetch data to cache memory */
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -176,8 +176,8 @@ static void convolve_horiz_8_transposed_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
dst_ptr = dst;
odd_dst = (dst_ptr + dst_stride);
@ -355,8 +355,8 @@ static void convolve_horiz_16_transposed_dspr2(const uint8_t *src_ptr,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src_ptr + src_stride);
vp9_prefetch_load(src_ptr + src_stride + 32);
prefetch_load(src_ptr + src_stride);
prefetch_load(src_ptr + src_stride + 32);
src = src_ptr;
dst = dst_ptr;
@ -645,9 +645,9 @@ static void convolve_horiz_64_transposed_dspr2(const uint8_t *src_ptr,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src_ptr + src_stride);
vp9_prefetch_load(src_ptr + src_stride + 32);
vp9_prefetch_load(src_ptr + src_stride + 64);
prefetch_load(src_ptr + src_stride);
prefetch_load(src_ptr + src_stride + 32);
prefetch_load(src_ptr + src_stride + 64);
src = src_ptr;
dst = dst_ptr;
@ -993,8 +993,8 @@ void vp9_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride,
src -= (src_stride * 3 + 3);
/* prefetch data to cache memory */
vp9_prefetch_load(src);
vp9_prefetch_load(src + 32);
prefetch_load(src);
prefetch_load(src + 32);
switch (w) {
case 4:
@ -1015,7 +1015,7 @@ void vp9_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride,
(w/16));
break;
case 64:
vp9_prefetch_load(src + 32);
prefetch_load(src + 32);
convolve_horiz_64_transposed_dspr2(src, src_stride,
temp, intermediate_height,
filter_x, intermediate_height);
@ -1078,9 +1078,9 @@ void vp9_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride,
int x, y;
/* prefetch data to cache memory */
vp9_prefetch_load(src);
vp9_prefetch_load(src + 32);
vp9_prefetch_store(dst);
prefetch_load(src);
prefetch_load(src + 32);
prefetch_store(dst);
switch (w) {
case 4:
@ -1089,9 +1089,9 @@ void vp9_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride,
/* 1 word storage */
for (y = h; y--; ) {
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], (%[src]) \n\t"
@ -1112,9 +1112,9 @@ void vp9_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride,
/* 2 word storage */
for (y = h; y--; ) {
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -1137,9 +1137,9 @@ void vp9_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride,
/* 4 word storage */
for (y = h; y--; ) {
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -1169,9 +1169,9 @@ void vp9_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride,
/* 8 word storage */
for (y = h; y--; ) {
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -1209,16 +1209,16 @@ void vp9_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint32_t tp1, tp2, tp3, tp4;
uint32_t tp5, tp6, tp7, tp8;
vp9_prefetch_load(src + 64);
vp9_prefetch_store(dst + 32);
prefetch_load(src + 64);
prefetch_store(dst + 32);
/* 16 word storage */
for (y = h; y--; ) {
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_load(src + src_stride + 64);
vp9_prefetch_store(dst + dst_stride);
vp9_prefetch_store(dst + dst_stride + 32);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_load(src + src_stride + 64);
prefetch_store(dst + dst_stride);
prefetch_store(dst + dst_stride + 32);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"

View File

@ -43,9 +43,9 @@ static void convolve_horiz_4_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -154,9 +154,9 @@ static void convolve_horiz_8_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_load(src + src_stride);
vp9_prefetch_load(src + src_stride + 32);
vp9_prefetch_store(dst + dst_stride);
prefetch_load(src + src_stride);
prefetch_load(src + src_stride + 32);
prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@ -323,9 +323,9 @@ static void convolve_horiz_16_dspr2(const uint8_t *src_ptr,
dst = dst_ptr;
/* prefetch data to cache memory */
vp9_prefetch_load(src_ptr + src_stride);
vp9_prefetch_load(src_ptr + src_stride + 32);
vp9_prefetch_store(dst_ptr + dst_stride);
prefetch_load(src_ptr + src_stride);
prefetch_load(src_ptr + src_stride + 32);
prefetch_store(dst_ptr + dst_stride);
for (c = 0; c < count; c++) {
__asm__ __volatile__ (
@ -593,11 +593,11 @@ static void convolve_horiz_64_dspr2(const uint8_t *src_ptr,
dst = dst_ptr;
/* prefetch data to cache memory */
vp9_prefetch_load(src_ptr + src_stride);
vp9_prefetch_load(src_ptr + src_stride + 32);
vp9_prefetch_load(src_ptr + src_stride + 64);
vp9_prefetch_store(dst_ptr + dst_stride);
vp9_prefetch_store(dst_ptr + dst_stride + 32);
prefetch_load(src_ptr + src_stride);
prefetch_load(src_ptr + src_stride + 32);
prefetch_load(src_ptr + src_stride + 64);
prefetch_store(dst_ptr + dst_stride);
prefetch_store(dst_ptr + dst_stride + 32);
for (c = 0; c < 4; c++) {
__asm__ __volatile__ (
@ -859,7 +859,7 @@ void vp9_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
if (16 == x_step_q4) {
uint32_t pos = 38;
vp9_prefetch_load((const uint8_t *)filter_x);
prefetch_load((const uint8_t *)filter_x);
src -= 3;
/* bit positon for extract from acc */
@ -870,9 +870,9 @@ void vp9_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
);
/* prefetch data to cache memory */
vp9_prefetch_load(src);
vp9_prefetch_load(src + 32);
vp9_prefetch_store(dst);
prefetch_load(src);
prefetch_load(src + 32);
prefetch_store(dst);
switch (w) {
case 4:
@ -896,8 +896,8 @@ void vp9_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
filter_x, (int32_t)h, 2);
break;
case 64:
vp9_prefetch_load(src + 64);
vp9_prefetch_store(dst + 32);
prefetch_load(src + 64);
prefetch_store(dst + 32);
convolve_horiz_64_dspr2(src, (int32_t)src_stride,
dst, (int32_t)dst_stride,

View File

@ -49,7 +49,7 @@ static void convolve_vert_4_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_store(dst + dst_stride);
prefetch_store(dst + dst_stride);
for (x = 0; x < w; x += 4) {
src_ptr = src + x;
@ -203,8 +203,8 @@ static void convolve_vert_64_dspr2(const uint8_t *src,
for (y = h; y--;) {
/* prefetch data to cache memory */
vp9_prefetch_store(dst + dst_stride);
vp9_prefetch_store(dst + dst_stride + 32);
prefetch_store(dst + dst_stride);
prefetch_store(dst + dst_stride + 32);
for (x = 0; x < 64; x += 4) {
src_ptr = src + x;
@ -358,7 +358,7 @@ void vp9_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
: [pos] "r" (pos)
);
vp9_prefetch_store(dst);
prefetch_store(dst);
switch (w) {
case 4 :
@ -370,7 +370,7 @@ void vp9_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
filter_y, w, h);
break;
case 64 :
vp9_prefetch_store(dst + 32);
prefetch_store(dst + 32);
convolve_vert_64_dspr2(src, src_stride,
dst, dst_stride,
filter_y, h);

View File

@ -34,7 +34,7 @@ static void idct16_rows_dspr2(const int16_t *input, int16_t *output,
for (i = no_rows; i--; ) {
/* prefetch row */
vp9_prefetch_load((const uint8_t *)(input + 16));
prefetch_load((const uint8_t *)(input + 16));
__asm__ __volatile__ (
"lh %[load1], 0(%[input]) \n\t"
@ -421,14 +421,14 @@ static void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
uint8_t *cm = vp9_ff_cropTbl;
/* prefetch vp9_ff_cropTbl */
vp9_prefetch_load(vp9_ff_cropTbl);
vp9_prefetch_load(vp9_ff_cropTbl + 32);
vp9_prefetch_load(vp9_ff_cropTbl + 64);
vp9_prefetch_load(vp9_ff_cropTbl + 96);
vp9_prefetch_load(vp9_ff_cropTbl + 128);
vp9_prefetch_load(vp9_ff_cropTbl + 160);
vp9_prefetch_load(vp9_ff_cropTbl + 192);
vp9_prefetch_load(vp9_ff_cropTbl + 224);
prefetch_load(vp9_ff_cropTbl);
prefetch_load(vp9_ff_cropTbl + 32);
prefetch_load(vp9_ff_cropTbl + 64);
prefetch_load(vp9_ff_cropTbl + 96);
prefetch_load(vp9_ff_cropTbl + 128);
prefetch_load(vp9_ff_cropTbl + 160);
prefetch_load(vp9_ff_cropTbl + 192);
prefetch_load(vp9_ff_cropTbl + 224);
for (i = 0; i < 16; ++i) {
dest_pix = (dest + i);
@ -1124,7 +1124,7 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
for (i = 0; i < 16; ++i) {
/* prefetch row */
vp9_prefetch_load((const uint8_t *)(input + 16));
prefetch_load((const uint8_t *)(input + 16));
iadst16(input, outptr);
input += 16;
@ -1144,7 +1144,7 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
for (i = 0; i < 16; ++i) {
/* prefetch row */
vp9_prefetch_load((const uint8_t *)(input + 16));
prefetch_load((const uint8_t *)(input + 16));
iadst16(input, outptr);
input += 16;

View File

@ -44,14 +44,14 @@ void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
uint8_t *cm = vp9_ff_cropTbl;
/* prefetch vp9_ff_cropTbl */
vp9_prefetch_load(vp9_ff_cropTbl);
vp9_prefetch_load(vp9_ff_cropTbl + 32);
vp9_prefetch_load(vp9_ff_cropTbl + 64);
vp9_prefetch_load(vp9_ff_cropTbl + 96);
vp9_prefetch_load(vp9_ff_cropTbl + 128);
vp9_prefetch_load(vp9_ff_cropTbl + 160);
vp9_prefetch_load(vp9_ff_cropTbl + 192);
vp9_prefetch_load(vp9_ff_cropTbl + 224);
prefetch_load(vp9_ff_cropTbl);
prefetch_load(vp9_ff_cropTbl + 32);
prefetch_load(vp9_ff_cropTbl + 64);
prefetch_load(vp9_ff_cropTbl + 96);
prefetch_load(vp9_ff_cropTbl + 128);
prefetch_load(vp9_ff_cropTbl + 160);
prefetch_load(vp9_ff_cropTbl + 192);
prefetch_load(vp9_ff_cropTbl + 224);
for (i = 0; i < 32; ++i) {
dest_pix = dest + i;

View File

@ -96,8 +96,8 @@ static void idct32_rows_dspr2(const int16_t *input, int16_t *output,
}
/* prefetch row */
vp9_prefetch_load((const uint8_t *)(input + 32));
vp9_prefetch_load((const uint8_t *)(input + 48));
prefetch_load((const uint8_t *)(input + 32));
prefetch_load((const uint8_t *)(input + 48));
__asm__ __volatile__ (
"lh %[load1], 2(%[input]) \n\t"

View File

@ -115,14 +115,14 @@ static void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
uint8_t *cm = vp9_ff_cropTbl;
/* prefetch vp9_ff_cropTbl */
vp9_prefetch_load(vp9_ff_cropTbl);
vp9_prefetch_load(vp9_ff_cropTbl + 32);
vp9_prefetch_load(vp9_ff_cropTbl + 64);
vp9_prefetch_load(vp9_ff_cropTbl + 96);
vp9_prefetch_load(vp9_ff_cropTbl + 128);
vp9_prefetch_load(vp9_ff_cropTbl + 160);
vp9_prefetch_load(vp9_ff_cropTbl + 192);
vp9_prefetch_load(vp9_ff_cropTbl + 224);
prefetch_load(vp9_ff_cropTbl);
prefetch_load(vp9_ff_cropTbl + 32);
prefetch_load(vp9_ff_cropTbl + 64);
prefetch_load(vp9_ff_cropTbl + 96);
prefetch_load(vp9_ff_cropTbl + 128);
prefetch_load(vp9_ff_cropTbl + 160);
prefetch_load(vp9_ff_cropTbl + 192);
prefetch_load(vp9_ff_cropTbl + 224);
for (i = 0; i < 4; ++i) {
dest_pix = (dest + i);

View File

@ -211,14 +211,14 @@ static void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
uint8_t *cm = vp9_ff_cropTbl;
/* prefetch vp9_ff_cropTbl */
vp9_prefetch_load(vp9_ff_cropTbl);
vp9_prefetch_load(vp9_ff_cropTbl + 32);
vp9_prefetch_load(vp9_ff_cropTbl + 64);
vp9_prefetch_load(vp9_ff_cropTbl + 96);
vp9_prefetch_load(vp9_ff_cropTbl + 128);
vp9_prefetch_load(vp9_ff_cropTbl + 160);
vp9_prefetch_load(vp9_ff_cropTbl + 192);
vp9_prefetch_load(vp9_ff_cropTbl + 224);
prefetch_load(vp9_ff_cropTbl);
prefetch_load(vp9_ff_cropTbl + 32);
prefetch_load(vp9_ff_cropTbl + 64);
prefetch_load(vp9_ff_cropTbl + 96);
prefetch_load(vp9_ff_cropTbl + 128);
prefetch_load(vp9_ff_cropTbl + 160);
prefetch_load(vp9_ff_cropTbl + 192);
prefetch_load(vp9_ff_cropTbl + 224);
for (i = 0; i < 8; ++i) {
dest_pix = (dest + i);

View File

@ -327,55 +327,55 @@ static void filter_selectively_vert_row2(int subsampling_factor,
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
if ((mask_16x16_0 & mask_16x16_1) & 1) {
vp9_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr);
} else if (mask_16x16_0 & 1) {
vp9_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr);
} else {
vp9_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
if ((mask_8x8_0 & mask_8x8_1) & 1) {
vp9_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_8x8_0 & 1) {
vp9_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
1);
} else {
vp9_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, 1);
}
}
if ((mask_4x4_0 | mask_4x4_1) & 1) {
if ((mask_4x4_0 & mask_4x4_1) & 1) {
vp9_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_4x4_0 & 1) {
vp9_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
1);
} else {
vp9_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, 1);
}
}
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
vp9_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_4x4_int_0 & 1) {
vp9_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1);
} else {
vp9_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, 1);
}
}
@ -427,55 +427,55 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor,
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
if ((mask_16x16_0 & mask_16x16_1) & 1) {
vp9_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
} else if (mask_16x16_0 & 1) {
vp9_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
} else {
vp9_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, bd);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
if ((mask_8x8_0 & mask_8x8_1) & 1) {
vp9_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_8x8_0 & 1) {
vp9_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1, bd);
} else {
vp9_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, 1, bd);
}
}
if ((mask_4x4_0 | mask_4x4_1) & 1) {
if ((mask_4x4_0 & mask_4x4_1) & 1) {
vp9_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_4x4_0 & 1) {
vp9_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1, bd);
} else {
vp9_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, 1, bd);
}
}
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
vp9_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_4x4_int_0 & 1) {
vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1, bd);
} else {
vp9_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, 1, bd);
}
}
@ -513,11 +513,11 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
if (mask & 1) {
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 2);
count = 2;
} else {
vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
} else if (mask_8x8 & 1) {
@ -525,28 +525,28 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
vp9_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr);
if ((mask_4x4_int & 3) == 3) {
vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, lfin->mblim,
lfin->lim, lfin->hev_thr);
} else {
if (mask_4x4_int & 1)
vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
else if (mask_4x4_int & 2)
vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
lfin->lim, lfin->hev_thr, 1);
}
count = 2;
} else {
vp9_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (mask_4x4_int & 1)
vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
} else if (mask_4x4 & 1) {
@ -554,31 +554,31 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
vp9_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr);
if ((mask_4x4_int & 3) == 3) {
vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, lfin->mblim,
lfin->lim, lfin->hev_thr);
} else {
if (mask_4x4_int & 1)
vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
else if (mask_4x4_int & 2)
vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
lfin->lim, lfin->hev_thr, 1);
}
count = 2;
} else {
vp9_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (mask_4x4_int & 1)
vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
} else if (mask_4x4_int & 1) {
vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
}
@ -610,11 +610,11 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
if (mask & 1) {
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 2, bd);
count = 2;
} else {
vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
}
} else if (mask_8x8 & 1) {
@ -622,31 +622,31 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
vp9_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
if ((mask_4x4_int & 3) == 3) {
vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr,
lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
} else {
if (mask_4x4_int & 1) {
vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1, bd);
} else if (mask_4x4_int & 2) {
vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
lfin->lim, lfin->hev_thr, 1, bd);
}
}
count = 2;
} else {
vp9_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
if (mask_4x4_int & 1) {
vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1, bd);
}
}
@ -655,35 +655,35 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
vp9_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
if ((mask_4x4_int & 3) == 3) {
vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr,
lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
} else {
if (mask_4x4_int & 1) {
vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1, bd);
} else if (mask_4x4_int & 2) {
vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
lfin->lim, lfin->hev_thr, 1, bd);
}
}
count = 2;
} else {
vp9_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
if (mask_4x4_int & 1) {
vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1, bd);
}
}
} else if (mask_4x4_int & 1) {
vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
}
}
@ -1094,15 +1094,15 @@ static void filter_selectively_vert(uint8_t *s, int pitch,
if (mask & 1) {
if (mask_16x16 & 1) {
vp9_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
} else if (mask_8x8 & 1) {
vp9_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
} else if (mask_4x4 & 1) {
vp9_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
}
if (mask_4x4_int & 1)
vp9_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
s += 8;
lfl += 1;
mask_16x16 >>= 1;
@ -1128,18 +1128,18 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch,
if (mask & 1) {
if (mask_16x16 & 1) {
vp9_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim,
vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, bd);
} else if (mask_8x8 & 1) {
vp9_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
} else if (mask_4x4 & 1) {
vp9_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
}
}
if (mask_4x4_int & 1)
vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
s += 8;
lfl += 1;

View File

@ -8,12 +8,12 @@
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_lpf_horizontal_4_dual_neon|
EXPORT |vpx_lpf_horizontal_4_dual_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
;void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p,
;void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p,
; const uint8_t *blimit0,
; const uint8_t *limit0,
; const uint8_t *thresh0,
@ -29,7 +29,7 @@
; sp+8 const uint8_t *limit1,
; sp+12 const uint8_t *thresh1,
|vp9_lpf_horizontal_4_dual_neon| PROC
|vpx_lpf_horizontal_4_dual_neon| PROC
push {lr}
ldr r12, [sp, #4] ; load thresh0
@ -66,7 +66,7 @@
sub r2, r2, r1, lsl #1
sub r3, r3, r1, lsl #1
bl vp9_loop_filter_neon_16
bl vpx_loop_filter_neon_16
vst1.u8 {q5}, [r2@64], r1 ; store op1
vst1.u8 {q6}, [r3@64], r1 ; store op0
@ -76,9 +76,9 @@
vpop {d8-d15} ; restore neon registers
pop {pc}
ENDP ; |vp9_lpf_horizontal_4_dual_neon|
ENDP ; |vpx_lpf_horizontal_4_dual_neon|
; void vp9_loop_filter_neon_16();
; void vpx_loop_filter_neon_16();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store. This function uses
; registers d8-d15, so the calling function must save those registers.
@ -101,7 +101,7 @@
; q6 op0
; q7 oq0
; q8 oq1
|vp9_loop_filter_neon_16| PROC
|vpx_loop_filter_neon_16| PROC
; filter_mask
vabd.u8 q11, q3, q4 ; m1 = abs(p3 - p2)
@ -194,6 +194,6 @@
veor q8, q12, q10 ; *oq1 = u^0x80
bx lr
ENDP ; |vp9_loop_filter_neon_16|
ENDP ; |vpx_loop_filter_neon_16|
END

View File

@ -14,7 +14,7 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
static INLINE void vp9_loop_filter_neon_16(
static INLINE void loop_filter_neon_16(
uint8x16_t qblimit, // blimit
uint8x16_t qlimit, // limit
uint8x16_t qthresh, // thresh
@ -124,7 +124,7 @@ static INLINE void vp9_loop_filter_neon_16(
return;
}
void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */,
void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@ -163,7 +163,7 @@ void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */,
s += p;
q10u8 = vld1q_u8(s);
vp9_loop_filter_neon_16(qblimit, qlimit, qthresh,
loop_filter_neon_16(qblimit, qlimit, qthresh,
q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8,
&q5u8, &q6u8, &q7u8, &q8u8);

View File

@ -8,18 +8,18 @@
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_lpf_horizontal_4_neon|
EXPORT |vp9_lpf_vertical_4_neon|
EXPORT |vpx_lpf_horizontal_4_neon|
EXPORT |vpx_lpf_vertical_4_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
; void vp9_lpf_horizontal_4_neon(uint8_t *s,
; void vpx_lpf_horizontal_4_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
@ -32,7 +32,7 @@
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
|vp9_lpf_horizontal_4_neon| PROC
|vpx_lpf_horizontal_4_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@ -41,7 +41,7 @@
add r1, r1, r1 ; double pitch
cmp r12, #0
beq end_vp9_lf_h_edge
beq end_vpx_lf_h_edge
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
@ -62,7 +62,7 @@ count_lf_h_loop
sub r2, r2, r1, lsl #1
sub r3, r3, r1, lsl #1
bl vp9_loop_filter_neon
bl vpx_loop_filter_neon
vst1.u8 {d4}, [r2@64], r1 ; store op1
vst1.u8 {d5}, [r3@64], r1 ; store op0
@ -73,16 +73,16 @@ count_lf_h_loop
subs r12, r12, #1
bne count_lf_h_loop
end_vp9_lf_h_edge
end_vpx_lf_h_edge
pop {pc}
ENDP ; |vp9_lpf_horizontal_4_neon|
ENDP ; |vpx_lpf_horizontal_4_neon|
; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
; void vp9_lpf_vertical_4_neon(uint8_t *s,
; void vpx_lpf_vertical_4_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
@ -95,7 +95,7 @@ end_vp9_lf_h_edge
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
|vp9_lpf_vertical_4_neon| PROC
|vpx_lpf_vertical_4_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@ -105,7 +105,7 @@ end_vp9_lf_h_edge
ldr r3, [sp, #4] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
cmp r12, #0
beq end_vp9_lf_v_edge
beq end_vpx_lf_v_edge
vld1.8 {d2[]}, [r3] ; duplicate *thresh
@ -135,7 +135,7 @@ count_lf_v_loop
vtrn.8 d7, d16
vtrn.8 d17, d18
bl vp9_loop_filter_neon
bl vpx_loop_filter_neon
sub r0, r0, #2
@ -154,11 +154,11 @@ count_lf_v_loop
subne r2, r0, #4 ; move s pointer down by 4 columns
bne count_lf_v_loop
end_vp9_lf_v_edge
end_vpx_lf_v_edge
pop {pc}
ENDP ; |vp9_lpf_vertical_4_neon|
ENDP ; |vpx_lpf_vertical_4_neon|
; void vp9_loop_filter_neon();
; void vpx_loop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store. The function does not use
; registers d8-d15.
@ -182,7 +182,7 @@ end_vp9_lf_v_edge
; d5 op0
; d6 oq0
; d7 oq1
|vp9_loop_filter_neon| PROC
|vpx_loop_filter_neon| PROC
; filter_mask
vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
@ -272,6 +272,6 @@ end_vp9_lf_v_edge
veor d7, d20, d18 ; *oq1 = u^0x80
bx lr
ENDP ; |vp9_loop_filter_neon|
ENDP ; |vpx_loop_filter_neon|
END

View File

@ -12,7 +12,7 @@
#include "./vpx_dsp_rtcd.h"
static INLINE void vp9_loop_filter_neon(
static INLINE void loop_filter_neon(
uint8x8_t dblimit, // flimit
uint8x8_t dlimit, // limit
uint8x8_t dthresh, // thresh
@ -110,7 +110,7 @@ static INLINE void vp9_loop_filter_neon(
return;
}
void vp9_lpf_horizontal_4_neon(
void vpx_lpf_horizontal_4_neon(
uint8_t *src,
int pitch,
const uint8_t *blimit,
@ -122,7 +122,7 @@ void vp9_lpf_horizontal_4_neon(
uint8x8_t dblimit, dlimit, dthresh;
uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
if (count == 0) // end_vp9_lf_h_edge
if (count == 0) // end_vpx_lf_h_edge
return;
dblimit = vld1_u8(blimit);
@ -149,7 +149,7 @@ void vp9_lpf_horizontal_4_neon(
s += pitch;
d18u8 = vld1_u8(s);
vp9_loop_filter_neon(dblimit, dlimit, dthresh,
loop_filter_neon(dblimit, dlimit, dthresh,
d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
&d4u8, &d5u8, &d6u8, &d7u8);
@ -165,7 +165,7 @@ void vp9_lpf_horizontal_4_neon(
return;
}
void vp9_lpf_vertical_4_neon(
void vpx_lpf_vertical_4_neon(
uint8_t *src,
int pitch,
const uint8_t *blimit,
@ -181,7 +181,7 @@ void vp9_lpf_vertical_4_neon(
uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
uint8x8x4_t d4Result;
if (count == 0) // end_vp9_lf_h_edge
if (count == 0) // end_vpx_lf_h_edge
return;
dblimit = vld1_u8(blimit);
@ -244,7 +244,7 @@ void vp9_lpf_vertical_4_neon(
d17u8 = d2tmp11.val[0];
d18u8 = d2tmp11.val[1];
vp9_loop_filter_neon(dblimit, dlimit, dthresh,
loop_filter_neon(dblimit, dlimit, dthresh,
d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
&d4u8, &d5u8, &d6u8, &d7u8);

View File

@ -8,18 +8,18 @@
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_lpf_horizontal_8_neon|
EXPORT |vp9_lpf_vertical_8_neon|
EXPORT |vpx_lpf_horizontal_8_neon|
EXPORT |vpx_lpf_vertical_8_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
; void vp9_lpf_horizontal_8_neon(uint8_t *s, int p,
; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh,
@ -30,7 +30,7 @@
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
|vp9_lpf_horizontal_8_neon| PROC
|vpx_lpf_horizontal_8_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@ -39,7 +39,7 @@
add r1, r1, r1 ; double pitch
cmp r12, #0
beq end_vp9_mblf_h_edge
beq end_vpx_mblf_h_edge
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
@ -60,7 +60,7 @@ count_mblf_h_loop
sub r3, r3, r1, lsl #1
sub r2, r2, r1, lsl #2
bl vp9_mbloop_filter_neon
bl vpx_mbloop_filter_neon
vst1.u8 {d0}, [r2@64], r1 ; store op2
vst1.u8 {d1}, [r3@64], r1 ; store op1
@ -73,12 +73,12 @@ count_mblf_h_loop
subs r12, r12, #1
bne count_mblf_h_loop
end_vp9_mblf_h_edge
end_vpx_mblf_h_edge
pop {r4-r5, pc}
ENDP ; |vp9_lpf_horizontal_8_neon|
ENDP ; |vpx_lpf_horizontal_8_neon|
; void vp9_lpf_vertical_8_neon(uint8_t *s,
; void vpx_lpf_vertical_8_neon(uint8_t *s,
; int pitch,
; const uint8_t *blimit,
; const uint8_t *limit,
@ -91,7 +91,7 @@ end_vp9_mblf_h_edge
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
|vp9_lpf_vertical_8_neon| PROC
|vpx_lpf_vertical_8_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@ -101,7 +101,7 @@ end_vp9_mblf_h_edge
ldr r3, [sp, #12] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
cmp r12, #0
beq end_vp9_mblf_v_edge
beq end_vpx_mblf_v_edge
vld1.8 {d2[]}, [r3] ; duplicate *thresh
@ -134,7 +134,7 @@ count_mblf_v_loop
sub r2, r0, #3
add r3, r0, #1
bl vp9_mbloop_filter_neon
bl vpx_mbloop_filter_neon
;store op2, op1, op0, oq0
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r2], r1
@ -161,11 +161,11 @@ count_mblf_v_loop
subne r2, r0, #4 ; move s pointer down by 4 columns
bne count_mblf_v_loop
end_vp9_mblf_v_edge
end_vpx_mblf_v_edge
pop {r4-r5, pc}
ENDP ; |vp9_lpf_vertical_8_neon|
ENDP ; |vpx_lpf_vertical_8_neon|
; void vp9_mbloop_filter_neon();
; void vpx_mbloop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store. The function does not use
; registers d8-d15.
@ -191,7 +191,7 @@ end_vp9_mblf_v_edge
; d3 oq0
; d4 oq1
; d5 oq2
|vp9_mbloop_filter_neon| PROC
|vpx_mbloop_filter_neon| PROC
; filter_mask
vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
@ -446,6 +446,6 @@ filter_branch_only
bx lr
ENDP ; |vp9_mbloop_filter_neon|
ENDP ; |vpx_mbloop_filter_neon|
END

View File

@ -12,7 +12,7 @@
#include "./vpx_dsp_rtcd.h"
static INLINE void vp9_mbloop_filter_neon(
static INLINE void mbloop_filter_neon(
uint8x8_t dblimit, // mblimit
uint8x8_t dlimit, // limit
uint8x8_t dthresh, // thresh
@ -263,7 +263,7 @@ static INLINE void vp9_mbloop_filter_neon(
return;
}
void vp9_lpf_horizontal_8_neon(
void vpx_lpf_horizontal_8_neon(
uint8_t *src,
int pitch,
const uint8_t *blimit,
@ -276,7 +276,7 @@ void vp9_lpf_horizontal_8_neon(
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
uint8x8_t d16u8, d17u8, d18u8;
if (count == 0) // end_vp9_mblf_h_edge
if (count == 0) // end_vpx_mblf_h_edge
return;
dblimit = vld1_u8(blimit);
@ -303,7 +303,7 @@ void vp9_lpf_horizontal_8_neon(
s += pitch;
d18u8 = vld1_u8(s);
vp9_mbloop_filter_neon(dblimit, dlimit, dthresh,
mbloop_filter_neon(dblimit, dlimit, dthresh,
d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
&d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);
@ -323,7 +323,7 @@ void vp9_lpf_horizontal_8_neon(
return;
}
void vp9_lpf_vertical_8_neon(
void vpx_lpf_vertical_8_neon(
uint8_t *src,
int pitch,
const uint8_t *blimit,
@ -403,7 +403,7 @@ void vp9_lpf_vertical_8_neon(
d17u8 = d2tmp11.val[0];
d18u8 = d2tmp11.val[1];
vp9_mbloop_filter_neon(dblimit, dlimit, dthresh,
mbloop_filter_neon(dblimit, dlimit, dthresh,
d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
&d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);

View File

@ -8,13 +8,13 @@
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_lpf_horizontal_16_neon|
EXPORT |vp9_lpf_vertical_16_neon|
EXPORT |vpx_lpf_horizontal_16_neon|
EXPORT |vpx_lpf_vertical_16_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
; void vp9_lpf_horizontal_16_neon(uint8_t *s, int p,
; void vpx_lpf_horizontal_16_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh
@ -24,7 +24,7 @@
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
|vp9_lpf_horizontal_16_neon| PROC
|vpx_lpf_horizontal_16_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
@ -54,7 +54,7 @@ h_count
vld1.u8 {d14}, [r8@64], r1 ; q6
vld1.u8 {d15}, [r8@64], r1 ; q7
bl vp9_wide_mbfilter_neon
bl vpx_wide_mbfilter_neon
tst r7, #1
beq h_mbfilter
@ -115,9 +115,9 @@ h_next
vpop {d8-d15}
pop {r4-r8, pc}
ENDP ; |vp9_lpf_horizontal_16_neon|
ENDP ; |vpx_lpf_horizontal_16_neon|
; void vp9_lpf_vertical_16_neon(uint8_t *s, int p,
; void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh)
@ -126,7 +126,7 @@ h_next
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
|vp9_lpf_vertical_16_neon| PROC
|vpx_lpf_vertical_16_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
@ -176,7 +176,7 @@ h_next
vtrn.8 d12, d13
vtrn.8 d14, d15
bl vp9_wide_mbfilter_neon
bl vpx_wide_mbfilter_neon
tst r7, #1
beq v_mbfilter
@ -279,9 +279,9 @@ v_end
vpop {d8-d15}
pop {r4-r8, pc}
ENDP ; |vp9_lpf_vertical_16_neon|
ENDP ; |vpx_lpf_vertical_16_neon|
; void vp9_wide_mbfilter_neon();
; void vpx_wide_mbfilter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store.
;
@ -305,7 +305,7 @@ v_end
; d13 q5
; d14 q6
; d15 q7
|vp9_wide_mbfilter_neon| PROC
|vpx_wide_mbfilter_neon| PROC
mov r7, #0
; filter_mask
@ -601,6 +601,6 @@ v_end
vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m)
bx lr
ENDP ; |vp9_wide_mbfilter_neon|
ENDP ; |vpx_wide_mbfilter_neon|
END

View File

@ -14,45 +14,45 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
void vp9_lpf_vertical_4_dual_neon(uint8_t *s, int p,
void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
vp9_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);
vp9_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);
vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
#if HAVE_NEON_ASM
void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
vp9_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1);
vp9_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1);
vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1);
vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1);
}
void vp9_lpf_vertical_8_dual_neon(uint8_t *s, int p,
void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
vp9_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);
vp9_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);
vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
void vp9_lpf_vertical_16_dual_neon(uint8_t *s, int p,
void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh) {
vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
vp9_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);
vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
vpx_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);
}
#endif // HAVE_NEON_ASM

View File

@ -115,7 +115,7 @@ static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1,
*op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
}
void vp9_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh, int count) {
int i;
@ -132,15 +132,15 @@ void vp9_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
}
}
void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
vp9_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
vp9_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
}
void vp9_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
int i;
@ -157,12 +157,12 @@ void vp9_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
}
}
void vp9_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
vp9_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);
vp9_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);
vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1);
}
@ -187,7 +187,7 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat,
}
}
void vp9_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
int i;
@ -207,15 +207,15 @@ void vp9_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
}
}
void vp9_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
vp9_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);
vp9_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);
vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);
vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);
}
void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
int i;
@ -232,12 +232,12 @@ void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
}
}
void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
vp9_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);
vp9_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);
vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1);
}
@ -292,7 +292,7 @@ static INLINE void filter16(int8_t mask, uint8_t thresh,
}
}
void vp9_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
int i;
@ -341,12 +341,12 @@ static void mb_lpf_vertical_edge_w(uint8_t *s, int p,
}
}
void vp9_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
void vpx_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
}
void vp9_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
void vpx_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16);
}
@ -446,7 +446,7 @@ static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1,
*op1 = signed_char_clamp_high(ps1 + filter, bd) + (0x80 << shift);
}
void vp9_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh, int count, int bd) {
int i;
@ -469,7 +469,7 @@ void vp9_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
}
}
void vp9_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p,
void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@ -477,11 +477,11 @@ void vp9_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p,
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
vp9_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd);
vp9_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd);
vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
}
void vp9_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
int i;
@ -498,7 +498,7 @@ void vp9_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
}
}
void vp9_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch,
void vpx_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@ -506,8 +506,8 @@ void vp9_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch,
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
vp9_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
vp9_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1, bd);
}
@ -532,7 +532,7 @@ static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat,
}
}
void vp9_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
int i;
@ -554,7 +554,7 @@ void vp9_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
}
}
void vp9_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p,
void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@ -562,11 +562,11 @@ void vp9_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p,
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
vp9_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd);
vp9_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd);
vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
}
void vp9_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
int i;
@ -586,7 +586,7 @@ void vp9_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
}
}
void vp9_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch,
void vpx_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@ -594,8 +594,8 @@ void vp9_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch,
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
vp9_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
vp9_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1, bd);
}
@ -662,7 +662,7 @@ static INLINE void highbd_filter16(int8_t mask, uint8_t thresh,
}
}
void vp9_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
int i;
@ -727,13 +727,13 @@ static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
}
}
void vp9_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
void vpx_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int bd) {
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd);
}
void vp9_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p,
void vpx_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,

View File

@ -21,7 +21,7 @@ extern "C" {
#if HAVE_DSPR2
#define CROP_WIDTH 512
static INLINE void vp9_prefetch_load(const unsigned char *src) {
static INLINE void prefetch_load(const unsigned char *src) {
__asm__ __volatile__ (
"pref 0, 0(%[src]) \n\t"
:
@ -30,7 +30,7 @@ static INLINE void vp9_prefetch_load(const unsigned char *src) {
}
/* prefetch data for store */
static INLINE void vp9_prefetch_store(unsigned char *dst) {
static INLINE void prefetch_store(unsigned char *dst) {
__asm__ __volatile__ (
"pref 1, 0(%[dst]) \n\t"
:
@ -38,7 +38,7 @@ static INLINE void vp9_prefetch_store(unsigned char *dst) {
);
}
static INLINE void vp9_prefetch_load_streamed(const unsigned char *src) {
static INLINE void prefetch_load_streamed(const unsigned char *src) {
__asm__ __volatile__ (
"pref 4, 0(%[src]) \n\t"
:
@ -47,7 +47,7 @@ static INLINE void vp9_prefetch_load_streamed(const unsigned char *src) {
}
/* prefetch data for store */
static INLINE void vp9_prefetch_store_streamed(unsigned char *dst) {
static INLINE void prefetch_store_streamed(unsigned char *dst) {
__asm__ __volatile__ (
"pref 5, 0(%[dst]) \n\t"
:

View File

@ -11,7 +11,7 @@
#include "vpx_ports/mem.h"
#include "vpx_dsp/mips/loopfilter_msa.h"
int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
int32_t vpx_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
uint8_t *filter48,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
@ -79,7 +79,7 @@ int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
}
}
void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
void vpx_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
v16u8 flat, flat2, filter8;
v16i8 zero = { 0 };
v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
@ -405,7 +405,7 @@ void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
}
}
void vp9_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@ -415,15 +415,15 @@ void vp9_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
(void)count;
early_exit = vp9_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,
early_exit = vpx_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,
limit_ptr, thresh_ptr);
if (0 == early_exit) {
vp9_hz_lpf_t16_16w(src, pitch, filter48);
vpx_hz_lpf_t16_16w(src, pitch, filter48);
}
}
void vp9_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@ -643,7 +643,7 @@ void vp9_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
}
}
} else {
vp9_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr,
vpx_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr,
thresh_ptr, count);
}
}
@ -744,7 +744,7 @@ static void transpose_16x16(uint8_t *input, int32_t in_pitch,
ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
}
int32_t vp9_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
int32_t vpx_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
uint8_t *src_org, int32_t pitch_org,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
@ -812,7 +812,7 @@ int32_t vp9_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
}
}
int32_t vp9_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,
int32_t vpx_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,
uint8_t *filter48) {
v16i8 zero = { 0 };
v16u8 filter8, flat, flat2;
@ -1032,7 +1032,7 @@ int32_t vp9_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,
}
}
void vp9_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr) {
@ -1042,12 +1042,12 @@ void vp9_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,
transpose_16x8_to_8x16(src - 8, pitch, transposed_input, 16);
early_exit = vp9_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8),
early_exit = vpx_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8),
&filter48[0], src, pitch, b_limit_ptr,
limit_ptr, thresh_ptr);
if (0 == early_exit) {
early_exit = vp9_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,
early_exit = vpx_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,
&filter48[0]);
if (0 == early_exit) {
@ -1056,7 +1056,7 @@ void vp9_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,
}
}
int32_t vp9_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
int32_t vpx_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
uint8_t *src_org, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
@ -1134,7 +1134,7 @@ int32_t vp9_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
}
}
int32_t vp9_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,
int32_t vpx_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,
uint8_t *filter48) {
v16u8 flat, flat2, filter8;
v16i8 zero = { 0 };
@ -1455,7 +1455,7 @@ int32_t vp9_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,
}
}
void vp9_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr) {
@ -1465,12 +1465,12 @@ void vp9_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch,
transpose_16x16((src - 8), pitch, &transposed_input[0], 16);
early_exit = vp9_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8),
early_exit = vpx_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8),
&filter48[0], src, pitch, b_limit_ptr,
limit_ptr, thresh_ptr);
if (0 == early_exit) {
early_exit = vp9_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,
early_exit = vpx_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,
&filter48[0]);
if (0 == early_exit) {

View File

@ -10,7 +10,7 @@
#include "vpx_dsp/mips/loopfilter_msa.h"
void vp9_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@ -39,7 +39,7 @@ void vp9_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
}
void vp9_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0_ptr,
const uint8_t *limit0_ptr,
const uint8_t *thresh0_ptr,
@ -71,7 +71,7 @@ void vp9_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
}
void vp9_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@ -102,7 +102,7 @@ void vp9_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
}
void vp9_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0_ptr,
const uint8_t *limit0_ptr,
const uint8_t *thresh0_ptr,

View File

@ -10,7 +10,7 @@
#include "vpx_dsp/mips/loopfilter_msa.h"
void vp9_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@ -83,7 +83,7 @@ void vp9_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
}
}
void vp9_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@ -158,7 +158,7 @@ void vp9_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
}
}
void vp9_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@ -237,7 +237,7 @@ void vp9_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
}
}
void vp9_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0,
const uint8_t *limit0,
const uint8_t *thresh0,

View File

@ -19,7 +19,7 @@
#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
void vp9_lpf_horizontal_4_dspr2(unsigned char *s,
void vpx_lpf_horizontal_4_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@ -49,7 +49,7 @@ void vp9_lpf_horizontal_4_dspr2(unsigned char *s,
);
/* prefetch data for store */
vp9_prefetch_store(s);
prefetch_store(s);
/* loop filter designed to work using chars so that we can make maximum use
of 8 bit simd instructions. */
@ -87,14 +87,14 @@ void vp9_lpf_horizontal_4_dspr2(unsigned char *s,
: [sm1] "r" (sm1), [s0] "r" (s0), [s5] "r" (s5), [s6] "r" (s6)
);
vp9_filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2,
filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2,
pm1, p0, p3, p4, p5, p6,
thresh_vec, &hev, &mask);
/* if mask == 0 do filtering is not needed */
if (mask) {
/* filtering */
vp9_filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
__asm__ __volatile__ (
"sw %[p1], (%[s1]) \n\t"
@ -113,7 +113,7 @@ void vp9_lpf_horizontal_4_dspr2(unsigned char *s,
}
}
void vp9_lpf_vertical_4_dspr2(unsigned char *s,
void vpx_lpf_vertical_4_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@ -143,7 +143,7 @@ void vp9_lpf_vertical_4_dspr2(unsigned char *s,
);
/* prefetch data for store */
vp9_prefetch_store(s + pitch);
prefetch_store(s + pitch);
for (i = 0; i < 2; i++) {
s1 = s;
@ -216,14 +216,14 @@ void vp9_lpf_vertical_4_dspr2(unsigned char *s,
* mask will be zero and filtering is not needed
*/
if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {
vp9_filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1,
filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1,
p0, p3, p4, p5, p6, thresh_vec,
&hev, &mask);
/* if mask == 0 do filtering is not needed */
if (mask) {
/* filtering */
vp9_filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
/* unpack processed 4x4 neighborhood
* don't use transpose on output data
@ -306,56 +306,56 @@ void vp9_lpf_vertical_4_dspr2(unsigned char *s,
}
}
void vp9_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,
void vpx_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
vp9_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
vp9_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
}
void vp9_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
vp9_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
vp9_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
}
void vp9_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
vp9_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
vp9_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);
vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
void vp9_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
vp9_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
vp9_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
1);
}
void vp9_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,
void vpx_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh) {
vp9_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);
vp9_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);
vpx_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);
vpx_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);
}
#endif // #if HAVE_DSPR2

View File

@ -24,10 +24,10 @@ extern "C" {
#if HAVE_DSPR2
/* inputs & outputs are quad-byte vectors */
static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev,
static INLINE void filter_dspr2(uint32_t mask, uint32_t hev,
uint32_t *ps1, uint32_t *ps0,
uint32_t *qs0, uint32_t *qs1) {
int32_t vp9_filter_l, vp9_filter_r;
int32_t vpx_filter_l, vpx_filter_r;
int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r;
int32_t subr_r, subr_l;
uint32_t t1, t2, HWM, t3;
@ -73,34 +73,34 @@ static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev,
hev_r = hev_r & HWM;
__asm__ __volatile__ (
/* vp9_filter = vp8_signed_char_clamp(ps1 - qs1); */
"subq_s.ph %[vp9_filter_l], %[vps1_l], %[vqs1_l] \n\t"
"subq_s.ph %[vp9_filter_r], %[vps1_r], %[vqs1_r] \n\t"
/* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */
"subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t"
"subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t"
/* qs0 - ps0 */
"subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t"
"subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t"
/* vp9_filter &= hev; */
"and %[vp9_filter_l], %[vp9_filter_l], %[hev_l] \n\t"
"and %[vp9_filter_r], %[vp9_filter_r], %[hev_r] \n\t"
/* vpx_filter &= hev; */
"and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t"
"and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t"
/* vp9_filter = vp8_signed_char_clamp(vp9_filter + 3 * (qs0 - ps0)); */
"addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
"addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
/* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */
"addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
"addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
"xor %[invhev_l], %[hev_l], %[HWM] \n\t"
"addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
"addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
"addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
"addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
"xor %[invhev_r], %[hev_r], %[HWM] \n\t"
"addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
"addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
"addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
"addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
/* vp9_filter &= mask; */
"and %[vp9_filter_l], %[vp9_filter_l], %[mask_l] \n\t"
"and %[vp9_filter_r], %[vp9_filter_r], %[mask_r] \n\t"
/* vpx_filter &= mask; */
"and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t"
"and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t"
: [vp9_filter_l] "=&r" (vp9_filter_l),
[vp9_filter_r] "=&r" (vp9_filter_r),
: [vpx_filter_l] "=&r" (vpx_filter_l),
[vpx_filter_r] "=&r" (vpx_filter_r),
[subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r),
[invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r)
: [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l),
@ -113,13 +113,13 @@ static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev,
/* save bottom 3 bits so that we round one side +4 and the other +3 */
__asm__ __volatile__ (
/* Filter2 = vp8_signed_char_clamp(vp9_filter + 3) >>= 3; */
"addq_s.ph %[Filter1_l], %[vp9_filter_l], %[t2] \n\t"
"addq_s.ph %[Filter1_r], %[vp9_filter_r], %[t2] \n\t"
/* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */
"addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t"
"addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t"
/* Filter1 = vp8_signed_char_clamp(vp9_filter + 4) >>= 3; */
"addq_s.ph %[Filter2_l], %[vp9_filter_l], %[t1] \n\t"
"addq_s.ph %[Filter2_r], %[vp9_filter_r], %[t1] \n\t"
/* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */
"addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t"
"addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t"
"shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t"
"shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t"
@ -142,23 +142,23 @@ static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev,
[vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
[vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)
: [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM),
[vp9_filter_l] "r" (vp9_filter_l), [vp9_filter_r] "r" (vp9_filter_r)
[vpx_filter_l] "r" (vpx_filter_l), [vpx_filter_r] "r" (vpx_filter_r)
);
__asm__ __volatile__ (
/* (vp9_filter += 1) >>= 1 */
/* (vpx_filter += 1) >>= 1 */
"addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t"
"addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t"
/* vp9_filter &= ~hev; */
/* vpx_filter &= ~hev; */
"and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t"
"and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t"
/* vps1 = vp8_signed_char_clamp(ps1 + vp9_filter); */
/* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */
"addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t"
"addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t"
/* vqs1 = vp8_signed_char_clamp(qs1 - vp9_filter); */
/* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */
"subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t"
"subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t"
@ -196,12 +196,12 @@ static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev,
*qs1 = vqs1 ^ N128;
}
static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev,
static INLINE void filter1_dspr2(uint32_t mask, uint32_t hev,
uint32_t ps1, uint32_t ps0,
uint32_t qs0, uint32_t qs1,
uint32_t *p1_f0, uint32_t *p0_f0,
uint32_t *q0_f0, uint32_t *q1_f0) {
int32_t vp9_filter_l, vp9_filter_r;
int32_t vpx_filter_l, vpx_filter_r;
int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r;
int32_t subr_r, subr_l;
uint32_t t1, t2, HWM, t3;
@ -247,34 +247,34 @@ static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev,
hev_r = hev_r & HWM;
__asm__ __volatile__ (
/* vp9_filter = vp8_signed_char_clamp(ps1 - qs1); */
"subq_s.ph %[vp9_filter_l], %[vps1_l], %[vqs1_l] \n\t"
"subq_s.ph %[vp9_filter_r], %[vps1_r], %[vqs1_r] \n\t"
/* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */
"subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t"
"subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t"
/* qs0 - ps0 */
"subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t"
"subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t"
/* vp9_filter &= hev; */
"and %[vp9_filter_l], %[vp9_filter_l], %[hev_l] \n\t"
"and %[vp9_filter_r], %[vp9_filter_r], %[hev_r] \n\t"
/* vpx_filter &= hev; */
"and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t"
"and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t"
/* vp9_filter = vp8_signed_char_clamp(vp9_filter + 3 * (qs0 - ps0)); */
"addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
"addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
/* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */
"addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
"addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
"xor %[invhev_l], %[hev_l], %[HWM] \n\t"
"addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
"addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
"addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
"addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
"xor %[invhev_r], %[hev_r], %[HWM] \n\t"
"addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
"addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
"addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
"addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
/* vp9_filter &= mask; */
"and %[vp9_filter_l], %[vp9_filter_l], %[mask_l] \n\t"
"and %[vp9_filter_r], %[vp9_filter_r], %[mask_r] \n\t"
/* vpx_filter &= mask; */
"and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t"
"and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t"
: [vp9_filter_l] "=&r" (vp9_filter_l),
[vp9_filter_r] "=&r" (vp9_filter_r),
: [vpx_filter_l] "=&r" (vpx_filter_l),
[vpx_filter_r] "=&r" (vpx_filter_r),
[subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r),
[invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r)
: [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l),
@ -286,13 +286,13 @@ static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev,
/* save bottom 3 bits so that we round one side +4 and the other +3 */
__asm__ __volatile__ (
/* Filter2 = vp8_signed_char_clamp(vp9_filter + 3) >>= 3; */
"addq_s.ph %[Filter1_l], %[vp9_filter_l], %[t2] \n\t"
"addq_s.ph %[Filter1_r], %[vp9_filter_r], %[t2] \n\t"
/* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */
"addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t"
"addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t"
/* Filter1 = vp8_signed_char_clamp(vp9_filter + 4) >>= 3; */
"addq_s.ph %[Filter2_l], %[vp9_filter_l], %[t1] \n\t"
"addq_s.ph %[Filter2_r], %[vp9_filter_r], %[t1] \n\t"
/* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */
"addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t"
"addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t"
"shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t"
"shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t"
@ -315,23 +315,23 @@ static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev,
[vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
[vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)
: [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM),
[vp9_filter_l] "r" (vp9_filter_l), [vp9_filter_r] "r" (vp9_filter_r)
[vpx_filter_l] "r" (vpx_filter_l), [vpx_filter_r] "r" (vpx_filter_r)
);
__asm__ __volatile__ (
/* (vp9_filter += 1) >>= 1 */
/* (vpx_filter += 1) >>= 1 */
"addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t"
"addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t"
/* vp9_filter &= ~hev; */
/* vpx_filter &= ~hev; */
"and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t"
"and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t"
/* vps1 = vp8_signed_char_clamp(ps1 + vp9_filter); */
/* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */
"addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t"
"addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t"
/* vqs1 = vp8_signed_char_clamp(qs1 - vp9_filter); */
/* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */
"subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t"
"subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t"
@ -369,7 +369,7 @@ static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev,
*q1_f0 = vqs1 ^ N128;
}
static INLINE void vp9_mbfilter_dspr2(uint32_t *op3, uint32_t *op2,
static INLINE void mbfilter_dspr2(uint32_t *op3, uint32_t *op2,
uint32_t *op1, uint32_t *op0,
uint32_t *oq0, uint32_t *oq1,
uint32_t *oq2, uint32_t *oq3) {
@ -446,7 +446,7 @@ static INLINE void vp9_mbfilter_dspr2(uint32_t *op3, uint32_t *op2,
*oq2 = res_oq2;
}
static INLINE void vp9_mbfilter1_dspr2(uint32_t p3, uint32_t p2,
static INLINE void mbfilter1_dspr2(uint32_t p3, uint32_t p2,
uint32_t p1, uint32_t p0,
uint32_t q0, uint32_t q1,
uint32_t q2, uint32_t q3,
@ -524,7 +524,7 @@ static INLINE void vp9_mbfilter1_dspr2(uint32_t p3, uint32_t p2,
*oq2_f1 = res_oq2;
}
static INLINE void vp9_wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6,
static INLINE void wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6,
uint32_t *op5, uint32_t *op4,
uint32_t *op3, uint32_t *op2,
uint32_t *op1, uint32_t *op0,

View File

@ -24,7 +24,7 @@ extern "C" {
#if HAVE_DSPR2
/* processing 4 pixels at the same time
* compute hev and mask in the same function */
static INLINE void vp9_filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,
static INLINE void filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,
uint32_t p1, uint32_t p0,
uint32_t p3, uint32_t p2,
uint32_t q0, uint32_t q1,
@ -129,7 +129,7 @@ static INLINE void vp9_filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,
*mask = s2;
}
static INLINE void vp9_filter_hev_mask_flatmask4_dspr2(uint32_t limit,
static INLINE void filter_hev_mask_flatmask4_dspr2(uint32_t limit,
uint32_t flimit,
uint32_t thresh,
uint32_t p1, uint32_t p0,
@ -279,7 +279,7 @@ static INLINE void vp9_filter_hev_mask_flatmask4_dspr2(uint32_t limit,
*flat = flat1;
}
static INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3,
static INLINE void flatmask5(uint32_t p4, uint32_t p3,
uint32_t p2, uint32_t p1,
uint32_t p0, uint32_t q0,
uint32_t q1, uint32_t q2,

View File

@ -19,7 +19,7 @@
#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
void vp9_lpf_horizontal_8_dspr2(unsigned char *s,
void vpx_lpf_horizontal_8_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@ -52,7 +52,7 @@ void vp9_lpf_horizontal_8_dspr2(unsigned char *s,
);
/* prefetch data for store */
vp9_prefetch_store(s);
prefetch_store(s);
for (i = 0; i < 2; i++) {
sp3 = s - (pitch << 2);
@ -80,12 +80,12 @@ void vp9_lpf_horizontal_8_dspr2(unsigned char *s,
[sq3] "r" (sq3), [sq2] "r" (sq2), [sq1] "r" (sq1), [sq0] "r" (sq0)
);
vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
p1, p0, p3, p2, q0, q1, q2, q3,
&hev, &mask, &flat);
if ((flat == 0) && (mask != 0)) {
vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
filter1_dspr2(mask, hev, p1, p0, q0, q1,
&p1_f0, &p0_f0, &q0_f0, &q1_f0);
__asm__ __volatile__ (
@ -103,12 +103,12 @@ void vp9_lpf_horizontal_8_dspr2(unsigned char *s,
} else if ((mask & flat) == 0xFFFFFFFF) {
/* left 2 element operation */
PACK_LEFT_0TO3()
vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
&q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
&q0_r, &q1_r, &q2_r, &q3_r);
COMBINE_LEFT_RIGHT_0TO2()
@ -129,17 +129,17 @@ void vp9_lpf_horizontal_8_dspr2(unsigned char *s,
);
} else if ((flat != 0) && (mask != 0)) {
/* filtering */
vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
filter1_dspr2(mask, hev, p1, p0, q0, q1,
&p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* left 2 element operation */
PACK_LEFT_0TO3()
vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
&q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
&q0_r, &q1_r, &q2_r, &q3_r);
if (mask & flat & 0x000000FF) {
@ -318,7 +318,7 @@ void vp9_lpf_horizontal_8_dspr2(unsigned char *s,
}
}
void vp9_lpf_vertical_8_dspr2(unsigned char *s,
void vpx_lpf_vertical_8_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@ -350,7 +350,7 @@ void vp9_lpf_vertical_8_dspr2(unsigned char *s,
: [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit)
);
vp9_prefetch_store(s + pitch);
prefetch_store(s + pitch);
for (i = 0; i < 2; i++) {
s1 = s;
@ -450,38 +450,38 @@ void vp9_lpf_vertical_8_dspr2(unsigned char *s,
:
);
vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
p1, p0, p3, p2, q0, q1, q2, q3,
&hev, &mask, &flat);
if ((flat == 0) && (mask != 0)) {
vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
filter1_dspr2(mask, hev, p1, p0, q0, q1,
&p1_f0, &p0_f0, &q0_f0, &q1_f0);
STORE_F0()
} else if ((mask & flat) == 0xFFFFFFFF) {
/* left 2 element operation */
PACK_LEFT_0TO3()
vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
&q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
&q0_r, &q1_r, &q2_r, &q3_r);
STORE_F1()
} else if ((flat != 0) && (mask != 0)) {
vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
filter1_dspr2(mask, hev, p1, p0, q0, q1,
&p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* left 2 element operation */
PACK_LEFT_0TO3()
vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
&q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
&q0_r, &q1_r, &q2_r, &q3_r);
if (mask & flat & 0x000000FF) {

View File

@ -19,7 +19,7 @@
#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
void vp9_lpf_horizontal_16_dspr2(unsigned char *s,
void vpx_lpf_horizontal_16_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@ -57,7 +57,7 @@ void vp9_lpf_horizontal_16_dspr2(unsigned char *s,
);
/* prefetch data for store */
vp9_prefetch_store(s);
prefetch_store(s);
for (i = 0; i < (2 * count); i++) {
sp7 = s - (pitch << 3);
@ -109,16 +109,16 @@ void vp9_lpf_horizontal_16_dspr2(unsigned char *s,
[sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6), [sq7] "r" (sq7)
);
vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
p1, p0, p3, p2, q0, q1, q2, q3,
&hev, &mask, &flat);
vp9_flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
/* f0 */
if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||
((flat2 != 0) && (flat == 0) && (mask != 0))) {
vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
filter1_dspr2(mask, hev, p1, p0, q0, q1,
&p1_f0, &p0_f0, &q0_f0, &q1_f0);
__asm__ __volatile__ (
@ -138,14 +138,14 @@ void vp9_lpf_horizontal_16_dspr2(unsigned char *s,
/* f2 */
PACK_LEFT_0TO3()
PACK_LEFT_4TO7()
vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
&p3_l, &p2_l, &p1_l, &p0_l,
&q0_l, &q1_l, &q2_l, &q3_l,
&q4_l, &q5_l, &q6_l, &q7_l);
PACK_RIGHT_0TO3()
PACK_RIGHT_4TO7()
vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
&p3_r, &p2_r, &p1_r, &p0_r,
&q0_r, &q1_r, &q2_r, &q3_r,
&q4_r, &q5_r, &q6_r, &q7_r);
@ -188,12 +188,12 @@ void vp9_lpf_horizontal_16_dspr2(unsigned char *s,
/* f1 */
/* left 2 element operation */
PACK_LEFT_0TO3()
vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
&q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
&q0_r, &q1_r, &q2_r, &q3_r);
COMBINE_LEFT_RIGHT_0TO2()
@ -214,17 +214,17 @@ void vp9_lpf_horizontal_16_dspr2(unsigned char *s,
);
} else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {
/* f0+f1 */
vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
filter1_dspr2(mask, hev, p1, p0, q0, q1,
&p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* left 2 element operation */
PACK_LEFT_0TO3()
vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
&q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
&q0_r, &q1_r, &q2_r, &q3_r);
if (mask & flat & 0x000000FF) {
@ -398,33 +398,33 @@ void vp9_lpf_horizontal_16_dspr2(unsigned char *s,
} else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {
/* f0 + f1 + f2 */
/* f0 function */
vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
filter1_dspr2(mask, hev, p1, p0, q0, q1,
&p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* f1 function */
/* left 2 element operation */
PACK_LEFT_0TO3()
vp9_mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
q0_l, q1_l, q2_l, q3_l,
&p2_l_f1, &p1_l_f1, &p0_l_f1,
&q0_l_f1, &q1_l_f1, &q2_l_f1);
/* right 2 element operation */
PACK_RIGHT_0TO3()
vp9_mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
q0_r, q1_r, q2_r, q3_r,
&p2_r_f1, &p1_r_f1, &p0_r_f1,
&q0_r_f1, &q1_r_f1, &q2_r_f1);
/* f2 function */
PACK_LEFT_4TO7()
vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
&p3_l, &p2_l, &p1_l, &p0_l,
&q0_l, &q1_l, &q2_l, &q3_l,
&q4_l, &q5_l, &q6_l, &q7_l);
PACK_RIGHT_4TO7()
vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
&p3_r, &p2_r, &p1_r, &p0_r,
&q0_r, &q1_r, &q2_r, &q3_r,
&q4_r, &q5_r, &q6_r, &q7_r);

View File

@ -19,7 +19,7 @@
#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
void vp9_lpf_vertical_16_dspr2(uint8_t *s,
void vpx_lpf_vertical_16_dspr2(uint8_t *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@ -54,7 +54,7 @@ void vp9_lpf_vertical_16_dspr2(uint8_t *s,
: [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit)
);
vp9_prefetch_store(s + pitch);
prefetch_store(s + pitch);
for (i = 0; i < 2; i++) {
s1 = s;
@ -247,16 +247,16 @@ void vp9_lpf_vertical_16_dspr2(uint8_t *s,
:
);
vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
p1, p0, p3, p2, q0, q1, q2, q3,
&hev, &mask, &flat);
vp9_flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
/* f0 */
if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||
((flat2 != 0) && (flat == 0) && (mask != 0))) {
vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
filter1_dspr2(mask, hev, p1, p0, q0, q1,
&p1_f0, &p0_f0, &q0_f0, &q1_f0);
STORE_F0()
} else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) &&
@ -264,14 +264,14 @@ void vp9_lpf_vertical_16_dspr2(uint8_t *s,
/* f2 */
PACK_LEFT_0TO3()
PACK_LEFT_4TO7()
vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
&p3_l, &p2_l, &p1_l, &p0_l,
&q0_l, &q1_l, &q2_l, &q3_l,
&q4_l, &q5_l, &q6_l, &q7_l);
PACK_RIGHT_0TO3()
PACK_RIGHT_4TO7()
vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
&p3_r, &p2_r, &p1_r, &p0_r,
&q0_r, &q1_r, &q2_r, &q3_r,
&q4_r, &q5_r, &q6_r, &q7_r);
@ -280,27 +280,27 @@ void vp9_lpf_vertical_16_dspr2(uint8_t *s,
} else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) {
/* f1 */
PACK_LEFT_0TO3()
vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
&q0_l, &q1_l, &q2_l, &q3_l);
PACK_RIGHT_0TO3()
vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
&q0_r, &q1_r, &q2_r, &q3_r);
STORE_F1()
} else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {
/* f0 + f1 */
vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
filter1_dspr2(mask, hev, p1, p0, q0, q1,
&p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* left 2 element operation */
PACK_LEFT_0TO3()
vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
&q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
&q0_r, &q1_r, &q2_r, &q3_r);
if (mask & flat & 0x000000FF) {
@ -465,29 +465,29 @@ void vp9_lpf_vertical_16_dspr2(uint8_t *s,
}
} else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {
/* f0+f1+f2 */
vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
filter1_dspr2(mask, hev, p1, p0, q0, q1,
&p1_f0, &p0_f0, &q0_f0, &q1_f0);
PACK_LEFT_0TO3()
vp9_mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
q0_l, q1_l, q2_l, q3_l,
&p2_l_f1, &p1_l_f1, &p0_l_f1,
&q0_l_f1, &q1_l_f1, &q2_l_f1);
PACK_RIGHT_0TO3()
vp9_mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
q0_r, q1_r, q2_r, q3_r,
&p2_r_f1, &p1_r_f1, &p0_r_f1,
&q0_r_f1, &q1_r_f1, &q2_r_f1);
PACK_LEFT_4TO7()
vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
&p3_l, &p2_l, &p1_l, &p0_l,
&q0_l, &q1_l, &q2_l, &q3_l,
&q4_l, &q5_l, &q6_l, &q7_l);
PACK_RIGHT_4TO7()
vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
&p3_r, &p2_r, &p1_r, &p0_r,
&q0_r, &q1_r, &q2_r, &q3_r,
&q4_r, &q5_r, &q6_r, &q7_r);

View File

@ -38,77 +38,77 @@ if ($opts{arch} eq "x86_64") {
#
# Loopfilter
#
add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vp9_lpf_vertical_16 sse2 neon_asm msa/;
$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon;
add_proto qw/void vpx_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vpx_lpf_vertical_16 sse2 neon_asm msa/;
$vpx_lpf_vertical_16_neon_asm=vpx_lpf_vertical_16_neon;
add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm msa/;
$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
add_proto qw/void vpx_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vpx_lpf_vertical_16_dual sse2 neon_asm msa/;
$vpx_lpf_vertical_16_dual_neon_asm=vpx_lpf_vertical_16_dual_neon;
add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vp9_lpf_vertical_8 sse2 neon msa/;
add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vpx_lpf_vertical_8 sse2 neon msa/;
add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm msa/;
$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm msa/;
$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;
add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vp9_lpf_vertical_4 mmx neon msa/;
add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vpx_lpf_vertical_4 mmx neon msa/;
add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vp9_lpf_vertical_4_dual sse2 neon msa/;
add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vpx_lpf_vertical_4_dual sse2 neon msa/;
add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm msa/;
$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm msa/;
$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon;
add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vp9_lpf_horizontal_8 sse2 neon msa/;
add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vpx_lpf_horizontal_8 sse2 neon msa/;
add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm msa/;
$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm msa/;
$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vp9_lpf_horizontal_4 mmx neon msa/;
add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vpx_lpf_horizontal_4 mmx neon msa/;
add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vp9_lpf_horizontal_4_dual sse2 neon msa/;
add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vpx_lpf_horizontal_4_dual sse2 neon msa/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vp9_highbd_lpf_vertical_16 sse2/;
add_proto qw/void vpx_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_vertical_16 sse2/;
add_proto qw/void vp9_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vp9_highbd_lpf_vertical_16_dual sse2/;
add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/;
add_proto qw/void vp9_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
specialize qw/vp9_highbd_lpf_vertical_8 sse2/;
add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
specialize qw/vpx_highbd_lpf_vertical_8 sse2/;
add_proto qw/void vp9_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vp9_highbd_lpf_vertical_8_dual sse2/;
add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/;
add_proto qw/void vp9_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
specialize qw/vp9_highbd_lpf_vertical_4 sse2/;
add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
specialize qw/vpx_highbd_lpf_vertical_4 sse2/;
add_proto qw/void vp9_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vp9_highbd_lpf_vertical_4_dual sse2/;
add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;
add_proto qw/void vp9_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
specialize qw/vp9_highbd_lpf_horizontal_16 sse2/;
add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
specialize qw/vpx_highbd_lpf_horizontal_16 sse2/;
add_proto qw/void vp9_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
specialize qw/vp9_highbd_lpf_horizontal_8 sse2/;
add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
add_proto qw/void vp9_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vp9_highbd_lpf_horizontal_8_dual sse2/;
add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/;
add_proto qw/void vp9_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
specialize qw/vp9_highbd_lpf_horizontal_4 sse2/;
add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
specialize qw/vpx_highbd_lpf_horizontal_4 sse2/;
add_proto qw/void vp9_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vp9_highbd_lpf_horizontal_4_dual sse2/;
add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vpx_highbd_lpf_horizontal_4_dual sse2/;
} # CONFIG_VP9_HIGHBITDEPTH
if (vpx_config("CONFIG_ENCODERS") eq "yes") {

View File

@ -508,7 +508,7 @@ static void highbd_mb_lpf_horizontal_edge_w_sse2_16(uint16_t *s,
}
// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
void vp9_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,
void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
@ -519,7 +519,7 @@ void vp9_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,
highbd_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh, bd);
}
void vp9_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
@ -687,7 +687,7 @@ void vp9_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
filt = _mm_adds_epi16(filt, work_a);
filt = _mm_adds_epi16(filt, work_a);
filt = _mm_adds_epi16(filt, work_a);
// (vp9_filter + 3 * (qs0 - ps0)) & mask
// (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = signed_char_clamp_bd_sse2(filt, bd);
filt = _mm_and_si128(filt, mask);
@ -756,7 +756,7 @@ void vp9_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
_mm_store_si128((__m128i *)(s + 2 * p), q2);
}
void vp9_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p,
void vpx_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p,
const uint8_t *_blimit0,
const uint8_t *_limit0,
const uint8_t *_thresh0,
@ -764,12 +764,12 @@ void vp9_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p,
const uint8_t *_limit1,
const uint8_t *_thresh1,
int bd) {
vp9_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
vp9_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1,
vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1,
1, bd);
}
void vp9_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
@ -891,7 +891,7 @@ void vp9_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
filt = _mm_adds_epi16(filt, work_a);
filt = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, work_a), bd);
// (vp9_filter + 3 * (qs0 - ps0)) & mask
// (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t4), bd);
@ -936,7 +936,7 @@ void vp9_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
_mm_storeu_si128((__m128i *)(s + 1 * p), q1);
}
void vp9_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p,
void vpx_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p,
const uint8_t *_blimit0,
const uint8_t *_limit0,
const uint8_t *_thresh0,
@ -944,8 +944,8 @@ void vp9_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p,
const uint8_t *_limit1,
const uint8_t *_thresh1,
int bd) {
vp9_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
vp9_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1,
vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1,
bd);
}
@ -1054,7 +1054,7 @@ static INLINE void highbd_transpose8x16(uint16_t *in0, uint16_t *in1,
highbd_transpose(src1, in_p, dest1, out_p, 1);
}
void vp9_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,
void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
@ -1071,7 +1071,7 @@ void vp9_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,
highbd_transpose(src, p, dst, 8, 1);
// Loop filtering
vp9_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
bd);
src[0] = t_dst;
@ -1081,7 +1081,7 @@ void vp9_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,
highbd_transpose(src, 8, dst, p, 1);
}
void vp9_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p,
void vpx_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@ -1097,7 +1097,7 @@ void vp9_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p,
highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
vp9_highbd_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
vpx_highbd_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
thresh0, blimit1, limit1, thresh1, bd);
src[0] = t_dst;
src[1] = t_dst + 8;
@ -1108,7 +1108,7 @@ void vp9_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p,
highbd_transpose(src, 16, dst, p, 2);
}
void vp9_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,
void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
@ -1125,7 +1125,7 @@ void vp9_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,
highbd_transpose(src, p, dst, 8, 1);
// Loop filtering
vp9_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
bd);
src[0] = t_dst;
@ -1135,7 +1135,7 @@ void vp9_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,
highbd_transpose(src, 8, dst, p, 1);
}
void vp9_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p,
void vpx_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@ -1151,7 +1151,7 @@ void vp9_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p,
highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
vp9_highbd_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
vpx_highbd_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
thresh0, blimit1, limit1, thresh1, bd);
src[0] = t_dst;
src[1] = t_dst + 8;
@ -1163,7 +1163,7 @@ void vp9_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p,
highbd_transpose(src, 16, dst, p, 2);
}
void vp9_highbd_lpf_vertical_16_sse2(uint16_t *s, int p,
void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
@ -1192,7 +1192,7 @@ void vp9_highbd_lpf_vertical_16_sse2(uint16_t *s, int p,
highbd_transpose(src, 8, dst, p, 2);
}
void vp9_highbd_lpf_vertical_16_dual_sse2(uint16_t *s,
void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s,
int p,
const uint8_t *blimit,
const uint8_t *limit,

View File

@ -103,7 +103,7 @@ static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
/* (vp9_filter + 3 * (qs0 - ps0)) & mask */
/* (vpx_filter + 3 * (qs0 - ps0)) & mask */
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@ -515,7 +515,7 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
/* (vp9_filter + 3 * (qs0 - ps0)) & mask */
/* (vpx_filter + 3 * (qs0 - ps0)) & mask */
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@ -976,7 +976,7 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
}
}
void vp9_lpf_horizontal_16_avx2(unsigned char *s, int p,
void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p,
const unsigned char *_blimit, const unsigned char *_limit,
const unsigned char *_thresh, int count) {
if (count == 1)

View File

@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
;void vp9_lpf_horizontal_4_mmx
;void vpx_lpf_horizontal_4_mmx
;(
; unsigned char *src_ptr,
; int src_pixel_step,
@ -21,8 +21,8 @@
; const char *thresh,
; int count
;)
global sym(vp9_lpf_horizontal_4_mmx) PRIVATE
sym(vp9_lpf_horizontal_4_mmx):
global sym(vpx_lpf_horizontal_4_mmx) PRIVATE
sym(vpx_lpf_horizontal_4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
@ -224,7 +224,7 @@ sym(vp9_lpf_horizontal_4_mmx):
ret
;void vp9_lpf_vertical_4_mmx
;void vpx_lpf_vertical_4_mmx
;(
; unsigned char *src_ptr,
; int src_pixel_step,
@ -233,8 +233,8 @@ sym(vp9_lpf_horizontal_4_mmx):
; const char *thresh,
; int count
;)
global sym(vp9_lpf_vertical_4_mmx) PRIVATE
sym(vp9_lpf_vertical_4_mmx):
global sym(vpx_lpf_vertical_4_mmx) PRIVATE
sym(vpx_lpf_vertical_4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6

View File

@ -100,7 +100,7 @@ static void mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s,
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
// (vp9_filter + 3 * (qs0 - ps0)) & mask
// (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@ -495,7 +495,7 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s,
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
// (vp9_filter + 3 * (qs0 - ps0)) & mask
// (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
filter2 = _mm_adds_epi8(filt, t3);
@ -717,7 +717,7 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s,
}
// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
void vp9_lpf_horizontal_16_sse2(unsigned char *s, int p,
void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p,
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh, int count) {
@ -727,7 +727,7 @@ void vp9_lpf_horizontal_16_sse2(unsigned char *s, int p,
mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);
}
void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p,
void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh, int count) {
@ -874,7 +874,7 @@ void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p,
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
// (vp9_filter + 3 * (qs0 - ps0)) & mask
// (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@ -943,7 +943,7 @@ void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p,
}
}
void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p,
void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p,
const uint8_t *_blimit0,
const uint8_t *_limit0,
const uint8_t *_thresh0,
@ -1115,7 +1115,7 @@ void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p,
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
// (vp9_filter + 3 * (qs0 - ps0)) & mask
// (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@ -1190,7 +1190,7 @@ void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p,
}
}
void vp9_lpf_horizontal_4_dual_sse2(unsigned char *s, int p,
void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int p,
const unsigned char *_blimit0,
const unsigned char *_limit0,
const unsigned char *_thresh0,
@ -1286,7 +1286,7 @@ void vp9_lpf_horizontal_4_dual_sse2(unsigned char *s, int p,
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
// (vp9_filter + 3 * (qs0 - ps0)) & mask
// (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@ -1464,7 +1464,7 @@ static INLINE void transpose(unsigned char *src[], int in_p,
} while (++idx8x8 < num_8x8_to_transpose);
}
void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
@ -1478,7 +1478,7 @@ void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
vp9_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
vpx_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
blimit1, limit1, thresh1);
src[0] = t_dst;
src[1] = t_dst + 8;
@ -1489,7 +1489,7 @@ void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
transpose(src, 16, dst, p, 2);
}
void vp9_lpf_vertical_8_sse2(unsigned char *s, int p,
void vpx_lpf_vertical_8_sse2(unsigned char *s, int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh, int count) {
@ -1505,7 +1505,7 @@ void vp9_lpf_vertical_8_sse2(unsigned char *s, int p,
transpose(src, p, dst, 8, 1);
// Loop filtering
vp9_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);
vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);
src[0] = t_dst;
dst[0] = s - 4;
@ -1514,7 +1514,7 @@ void vp9_lpf_vertical_8_sse2(unsigned char *s, int p,
transpose(src, 8, dst, p, 1);
}
void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
@ -1528,7 +1528,7 @@ void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
vp9_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
vpx_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
blimit1, limit1, thresh1);
src[0] = t_dst;
src[1] = t_dst + 8;
@ -1540,7 +1540,7 @@ void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
transpose(src, 16, dst, p, 2);
}
void vp9_lpf_vertical_16_sse2(unsigned char *s, int p,
void vpx_lpf_vertical_16_sse2(unsigned char *s, int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh) {
@ -1568,7 +1568,7 @@ void vp9_lpf_vertical_16_sse2(unsigned char *s, int p,
transpose(src, 8, dst, p, 2);
}
void vp9_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh) {
DECLARE_ALIGNED(16, unsigned char, t_dst[256]);