Unify loopfilter function names
Rename vpx_lpf_horizontal_edge_8() to vpx_lpf_horizontal_16(). Rename vpx_lpf_horizontal_edge_16() to vpx_lpf_horizontal_16_dual(). Change-Id: I798ca8fbbd657d06d3db2bfb0fb3321168f49e52
This commit is contained in:
@@ -402,10 +402,10 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
&vpx_highbd_lpf_vertical_4_c, 8),
|
&vpx_highbd_lpf_vertical_4_c, 8),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_8_c, 8),
|
&vpx_highbd_lpf_horizontal_8_c, 8),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_edge_8_c, 8),
|
&vpx_highbd_lpf_horizontal_16_c, 8),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_edge_16_c, 8),
|
&vpx_highbd_lpf_horizontal_16_dual_c, 8),
|
||||||
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
|
||||||
&vpx_highbd_lpf_vertical_8_c, 8),
|
&vpx_highbd_lpf_vertical_8_c, 8),
|
||||||
make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
|
||||||
@@ -416,10 +416,10 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
&vpx_highbd_lpf_vertical_4_c, 10),
|
&vpx_highbd_lpf_vertical_4_c, 10),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_8_c, 10),
|
&vpx_highbd_lpf_horizontal_8_c, 10),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_edge_8_c, 10),
|
&vpx_highbd_lpf_horizontal_16_c, 10),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_edge_16_c, 10),
|
&vpx_highbd_lpf_horizontal_16_dual_c, 10),
|
||||||
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
|
||||||
&vpx_highbd_lpf_vertical_8_c, 10),
|
&vpx_highbd_lpf_vertical_8_c, 10),
|
||||||
make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
|
||||||
@@ -430,10 +430,10 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
&vpx_highbd_lpf_vertical_4_c, 12),
|
&vpx_highbd_lpf_vertical_4_c, 12),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_8_c, 12),
|
&vpx_highbd_lpf_horizontal_8_c, 12),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_edge_8_c, 12),
|
&vpx_highbd_lpf_horizontal_16_c, 12),
|
||||||
make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
|
make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
|
||||||
&vpx_highbd_lpf_horizontal_edge_16_c, 12),
|
&vpx_highbd_lpf_horizontal_16_dual_c, 12),
|
||||||
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
|
||||||
&vpx_highbd_lpf_vertical_8_c, 12),
|
&vpx_highbd_lpf_vertical_8_c, 12),
|
||||||
make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
|
make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
|
||||||
@@ -450,10 +450,9 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vpx_lpf_horizontal_4_sse2, &vpx_lpf_horizontal_4_c, 8),
|
make_tuple(&vpx_lpf_horizontal_4_sse2, &vpx_lpf_horizontal_4_c, 8),
|
||||||
make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8),
|
make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8),
|
||||||
make_tuple(&vpx_lpf_horizontal_edge_8_sse2,
|
make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8),
|
||||||
&vpx_lpf_horizontal_edge_8_c, 8),
|
make_tuple(&vpx_lpf_horizontal_16_dual_sse2,
|
||||||
make_tuple(&vpx_lpf_horizontal_edge_16_sse2,
|
&vpx_lpf_horizontal_16_dual_c, 8),
|
||||||
&vpx_lpf_horizontal_edge_16_c, 8),
|
|
||||||
make_tuple(&vpx_lpf_vertical_4_sse2, &vpx_lpf_vertical_4_c, 8),
|
make_tuple(&vpx_lpf_vertical_4_sse2, &vpx_lpf_vertical_4_c, 8),
|
||||||
make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8),
|
make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8),
|
||||||
make_tuple(&vpx_lpf_vertical_16_sse2, &vpx_lpf_vertical_16_c, 8),
|
make_tuple(&vpx_lpf_vertical_16_sse2, &vpx_lpf_vertical_16_c, 8),
|
||||||
@@ -465,10 +464,10 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
#if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
|
#if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
AVX2, Loop8Test6Param,
|
AVX2, Loop8Test6Param,
|
||||||
::testing::Values(make_tuple(&vpx_lpf_horizontal_edge_8_avx2,
|
::testing::Values(make_tuple(&vpx_lpf_horizontal_16_avx2,
|
||||||
&vpx_lpf_horizontal_edge_8_c, 8),
|
&vpx_lpf_horizontal_16_c, 8),
|
||||||
make_tuple(&vpx_lpf_horizontal_edge_16_avx2,
|
make_tuple(&vpx_lpf_horizontal_16_dual_avx2,
|
||||||
&vpx_lpf_horizontal_edge_16_c, 8)));
|
&vpx_lpf_horizontal_16_dual_c, 8)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSE2
|
#if HAVE_SSE2
|
||||||
@@ -520,10 +519,9 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
NEON, Loop8Test6Param,
|
NEON, Loop8Test6Param,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vpx_lpf_horizontal_edge_8_neon,
|
make_tuple(&vpx_lpf_horizontal_16_neon, &vpx_lpf_horizontal_16_c, 8),
|
||||||
&vpx_lpf_horizontal_edge_8_c, 8),
|
make_tuple(&vpx_lpf_horizontal_16_dual_neon,
|
||||||
make_tuple(&vpx_lpf_horizontal_edge_16_neon,
|
&vpx_lpf_horizontal_16_dual_c, 8),
|
||||||
&vpx_lpf_horizontal_edge_16_c, 8),
|
|
||||||
make_tuple(&vpx_lpf_vertical_16_neon, &vpx_lpf_vertical_16_c, 8),
|
make_tuple(&vpx_lpf_vertical_16_neon, &vpx_lpf_vertical_16_c, 8),
|
||||||
make_tuple(&vpx_lpf_vertical_16_dual_neon, &vpx_lpf_vertical_16_dual_c,
|
make_tuple(&vpx_lpf_vertical_16_dual_neon, &vpx_lpf_vertical_16_dual_c,
|
||||||
8),
|
8),
|
||||||
@@ -550,8 +548,9 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8),
|
make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8),
|
||||||
make_tuple(&vpx_lpf_horizontal_8_dspr2, &vpx_lpf_horizontal_8_c, 8),
|
make_tuple(&vpx_lpf_horizontal_8_dspr2, &vpx_lpf_horizontal_8_c, 8),
|
||||||
make_tuple(&vpx_lpf_horizontal_edge_8, &vpx_lpf_horizontal_edge_8, 8),
|
make_tuple(&vpx_lpf_horizontal_16_dspr2, &vpx_lpf_horizontal_16_c, 8),
|
||||||
make_tuple(&vpx_lpf_horizontal_edge_16, &vpx_lpf_horizontal_edge_16, 8),
|
make_tuple(&vpx_lpf_horizontal_16_dual_dspr2,
|
||||||
|
&vpx_lpf_horizontal_16_dual_c, 8),
|
||||||
make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8),
|
make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8),
|
||||||
make_tuple(&vpx_lpf_vertical_8_dspr2, &vpx_lpf_vertical_8_c, 8),
|
make_tuple(&vpx_lpf_vertical_8_dspr2, &vpx_lpf_vertical_8_c, 8),
|
||||||
make_tuple(&vpx_lpf_vertical_16_dspr2, &vpx_lpf_vertical_16_c, 8),
|
make_tuple(&vpx_lpf_vertical_16_dspr2, &vpx_lpf_vertical_16_c, 8),
|
||||||
@@ -576,10 +575,9 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8),
|
make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8),
|
||||||
make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8),
|
make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8),
|
||||||
make_tuple(&vpx_lpf_horizontal_edge_8_msa, &vpx_lpf_horizontal_edge_8_c,
|
make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8),
|
||||||
8),
|
make_tuple(&vpx_lpf_horizontal_16_dual_msa,
|
||||||
make_tuple(&vpx_lpf_horizontal_edge_16_msa,
|
&vpx_lpf_horizontal_16_dual_c, 8),
|
||||||
&vpx_lpf_horizontal_edge_16_c, 8),
|
|
||||||
make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8),
|
make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8),
|
||||||
make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8),
|
make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8),
|
||||||
make_tuple(&vpx_lpf_vertical_16_msa, &vpx_lpf_vertical_16_c, 8)));
|
make_tuple(&vpx_lpf_vertical_16_msa, &vpx_lpf_vertical_16_c, 8)));
|
||||||
|
|||||||
@@ -465,12 +465,11 @@ static void filter_selectively_horiz(
|
|||||||
|
|
||||||
if (mask_16x16 & 1) {
|
if (mask_16x16 & 1) {
|
||||||
if ((mask_16x16 & 3) == 3) {
|
if ((mask_16x16 & 3) == 3) {
|
||||||
vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr);
|
lfi->hev_thr);
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
lfi->hev_thr);
|
|
||||||
}
|
}
|
||||||
} else if (mask_8x8 & 1) {
|
} else if (mask_8x8 & 1) {
|
||||||
if ((mask_8x8 & 3) == 3) {
|
if ((mask_8x8 & 3) == 3) {
|
||||||
@@ -559,12 +558,12 @@ static void highbd_filter_selectively_horiz(
|
|||||||
|
|
||||||
if (mask_16x16 & 1) {
|
if (mask_16x16 & 1) {
|
||||||
if ((mask_16x16 & 3) == 3) {
|
if ((mask_16x16 & 3) == 3) {
|
||||||
vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, bd);
|
lfi->hev_thr, bd);
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
|
vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, bd);
|
lfi->hev_thr, bd);
|
||||||
}
|
}
|
||||||
} else if (mask_8x8 & 1) {
|
} else if (mask_8x8 & 1) {
|
||||||
if ((mask_8x8 & 3) == 3) {
|
if ((mask_8x8 & 3) == 3) {
|
||||||
|
|||||||
@@ -8,8 +8,8 @@
|
|||||||
; be found in the AUTHORS file in the root of the source tree.
|
; be found in the AUTHORS file in the root of the source tree.
|
||||||
;
|
;
|
||||||
|
|
||||||
EXPORT |vpx_lpf_horizontal_edge_8_neon|
|
EXPORT |vpx_lpf_horizontal_16_neon|
|
||||||
EXPORT |vpx_lpf_horizontal_edge_16_neon|
|
EXPORT |vpx_lpf_horizontal_16_dual_neon|
|
||||||
EXPORT |vpx_lpf_vertical_16_neon|
|
EXPORT |vpx_lpf_vertical_16_neon|
|
||||||
EXPORT |vpx_lpf_vertical_16_dual_neon|
|
EXPORT |vpx_lpf_vertical_16_dual_neon|
|
||||||
ARM
|
ARM
|
||||||
@@ -119,7 +119,7 @@ h_next
|
|||||||
|
|
||||||
ENDP ; |mb_lpf_horizontal_edge|
|
ENDP ; |mb_lpf_horizontal_edge|
|
||||||
|
|
||||||
; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch,
|
; void vpx_lpf_horizontal_16_neon(uint8_t *s, int pitch,
|
||||||
; const uint8_t *blimit,
|
; const uint8_t *blimit,
|
||||||
; const uint8_t *limit,
|
; const uint8_t *limit,
|
||||||
; const uint8_t *thresh)
|
; const uint8_t *thresh)
|
||||||
@@ -128,12 +128,12 @@ h_next
|
|||||||
; r2 const uint8_t *blimit,
|
; r2 const uint8_t *blimit,
|
||||||
; r3 const uint8_t *limit,
|
; r3 const uint8_t *limit,
|
||||||
; sp const uint8_t *thresh
|
; sp const uint8_t *thresh
|
||||||
|vpx_lpf_horizontal_edge_8_neon| PROC
|
|vpx_lpf_horizontal_16_neon| PROC
|
||||||
mov r12, #1
|
mov r12, #1
|
||||||
b mb_lpf_horizontal_edge
|
b mb_lpf_horizontal_edge
|
||||||
ENDP ; |vpx_lpf_horizontal_edge_8_neon|
|
ENDP ; |vpx_lpf_horizontal_16_neon|
|
||||||
|
|
||||||
; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch,
|
; void vpx_lpf_horizontal_16_dual_neon(uint8_t *s, int pitch,
|
||||||
; const uint8_t *blimit,
|
; const uint8_t *blimit,
|
||||||
; const uint8_t *limit,
|
; const uint8_t *limit,
|
||||||
; const uint8_t *thresh)
|
; const uint8_t *thresh)
|
||||||
@@ -142,10 +142,10 @@ h_next
|
|||||||
; r2 const uint8_t *blimit,
|
; r2 const uint8_t *blimit,
|
||||||
; r3 const uint8_t *limit,
|
; r3 const uint8_t *limit,
|
||||||
; sp const uint8_t *thresh
|
; sp const uint8_t *thresh
|
||||||
|vpx_lpf_horizontal_edge_16_neon| PROC
|
|vpx_lpf_horizontal_16_dual_neon| PROC
|
||||||
mov r12, #2
|
mov r12, #2
|
||||||
b mb_lpf_horizontal_edge
|
b mb_lpf_horizontal_edge
|
||||||
ENDP ; |vpx_lpf_horizontal_edge_16_neon|
|
ENDP ; |vpx_lpf_horizontal_16_dual_neon|
|
||||||
|
|
||||||
; void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
|
; void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
; const uint8_t *limit, const uint8_t *thresh,
|
; const uint8_t *limit, const uint8_t *thresh,
|
||||||
|
|||||||
@@ -975,9 +975,8 @@ FUN_LPF_16_KERNEL(_, 8) // lpf_16_kernel
|
|||||||
FUN_LPF_16_KERNEL(_dual_, 16) // lpf_16_dual_kernel
|
FUN_LPF_16_KERNEL(_dual_, 16) // lpf_16_dual_kernel
|
||||||
#undef FUN_LPF_16_KERNEL
|
#undef FUN_LPF_16_KERNEL
|
||||||
|
|
||||||
void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int p, const uint8_t *blimit,
|
void vpx_lpf_horizontal_16_neon(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
const uint8_t *thresh) {
|
|
||||||
uint8x8_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6,
|
uint8x8_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6,
|
||||||
op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6;
|
op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6;
|
||||||
uint32_t flat_status, flat2_status;
|
uint32_t flat_status, flat2_status;
|
||||||
@@ -992,7 +991,7 @@ void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int p, const uint8_t *blimit,
|
|||||||
oq5, oq6, flat_status, flat2_status);
|
oq5, oq6, flat_status, flat2_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int p, const uint8_t *blimit,
|
void vpx_lpf_horizontal_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh) {
|
const uint8_t *thresh) {
|
||||||
uint8x16_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7,
|
uint8x16_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7,
|
||||||
|
|||||||
@@ -308,12 +308,12 @@ static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit,
|
void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh) {
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1);
|
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
void vpx_lpf_horizontal_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh) {
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2);
|
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2);
|
||||||
}
|
}
|
||||||
@@ -673,14 +673,13 @@ static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p,
|
void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *blimit,
|
const uint8_t *limit, const uint8_t *thresh,
|
||||||
const uint8_t *limit,
|
int bd) {
|
||||||
const uint8_t *thresh, int bd) {
|
|
||||||
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd);
|
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p,
|
void vpx_highbd_lpf_horizontal_16_dual_c(uint16_t *s, int p,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh, int bd) {
|
const uint8_t *thresh, int bd) {
|
||||||
|
|||||||
@@ -403,10 +403,11 @@ void vpx_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
|
static void mb_lpf_horizontal_edge_dual(uint8_t *src, int32_t pitch,
|
||||||
const uint8_t *b_limit_ptr,
|
const uint8_t *b_limit_ptr,
|
||||||
const uint8_t *limit_ptr,
|
const uint8_t *limit_ptr,
|
||||||
const uint8_t *thresh_ptr, int32_t count) {
|
const uint8_t *thresh_ptr,
|
||||||
|
int32_t count) {
|
||||||
DECLARE_ALIGNED(32, uint8_t, filter48[16 * 8]);
|
DECLARE_ALIGNED(32, uint8_t, filter48[16 * 8]);
|
||||||
uint8_t early_exit = 0;
|
uint8_t early_exit = 0;
|
||||||
|
|
||||||
@@ -638,19 +639,19 @@ static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr,
|
mb_lpf_horizontal_edge_dual(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr,
|
||||||
thresh_ptr, count);
|
count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_edge_8_msa(uint8_t *src, int32_t pitch,
|
void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
|
||||||
const uint8_t *b_limit_ptr,
|
const uint8_t *b_limit_ptr,
|
||||||
const uint8_t *limit_ptr,
|
const uint8_t *limit_ptr,
|
||||||
const uint8_t *thresh_ptr) {
|
const uint8_t *thresh_ptr) {
|
||||||
mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1);
|
mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_edge_16_msa(uint8_t *src, int32_t pitch,
|
void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
|
||||||
const uint8_t *b_limit_ptr,
|
const uint8_t *b_limit_ptr,
|
||||||
const uint8_t *limit_ptr,
|
const uint8_t *limit_ptr,
|
||||||
const uint8_t *thresh_ptr) {
|
const uint8_t *thresh_ptr) {
|
||||||
|
|||||||
@@ -717,14 +717,13 @@ static void mb_lpf_horizontal_edge(unsigned char *s, int pitch,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_edge_8_dspr2(unsigned char *s, int pitch,
|
void vpx_lpf_horizontal_16_dspr2(unsigned char *s, int pitch,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit, const uint8_t *limit,
|
||||||
const uint8_t *limit,
|
const uint8_t *thresh) {
|
||||||
const uint8_t *thresh) {
|
|
||||||
mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1);
|
mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_edge_16_dspr2(unsigned char *s, int pitch,
|
void vpx_lpf_horizontal_16_dual_dspr2(unsigned char *s, int pitch,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh) {
|
const uint8_t *thresh) {
|
||||||
|
|||||||
@@ -522,11 +522,11 @@ specialize qw/vpx_lpf_vertical_4 sse2 neon dspr2 msa/;
|
|||||||
add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||||
specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/;
|
specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||||
specialize qw/vpx_lpf_horizontal_edge_8 sse2 avx2 neon dspr2 msa/;
|
specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
add_proto qw/void vpx_lpf_horizontal_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||||
specialize qw/vpx_lpf_horizontal_edge_16 sse2 avx2 neon dspr2 msa/;
|
specialize qw/vpx_lpf_horizontal_16_dual sse2 avx2 neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||||
specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
|
specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
|
||||||
@@ -559,11 +559,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
|
add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
|
||||||
specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;
|
specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
||||||
specialize qw/vpx_highbd_lpf_horizontal_edge_8 sse2/;
|
specialize qw/vpx_highbd_lpf_horizontal_16 sse2/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
add_proto qw/void vpx_highbd_lpf_horizontal_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
||||||
specialize qw/vpx_highbd_lpf_horizontal_edge_16 sse2/;
|
specialize qw/vpx_highbd_lpf_horizontal_16_dual sse2/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
|
||||||
specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
|
specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
|
||||||
|
|||||||
@@ -48,10 +48,10 @@ static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) {
|
|||||||
|
|
||||||
// TODO(debargha, peter): Break up large functions into smaller ones
|
// TODO(debargha, peter): Break up large functions into smaller ones
|
||||||
// in this file.
|
// in this file.
|
||||||
void vpx_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p,
|
void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,
|
||||||
const uint8_t *_blimit,
|
const uint8_t *_blimit,
|
||||||
const uint8_t *_limit,
|
const uint8_t *_limit,
|
||||||
const uint8_t *_thresh, int bd) {
|
const uint8_t *_thresh, int bd) {
|
||||||
const __m128i zero = _mm_set1_epi16(0);
|
const __m128i zero = _mm_set1_epi16(0);
|
||||||
const __m128i one = _mm_set1_epi16(1);
|
const __m128i one = _mm_set1_epi16(1);
|
||||||
__m128i blimit, limit, thresh;
|
__m128i blimit, limit, thresh;
|
||||||
@@ -475,12 +475,12 @@ void vpx_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p,
|
|||||||
_mm_store_si128((__m128i *)(s - 0 * p), q0);
|
_mm_store_si128((__m128i *)(s - 0 * p), q0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p,
|
void vpx_highbd_lpf_horizontal_16_dual_sse2(uint16_t *s, int p,
|
||||||
const uint8_t *_blimit,
|
const uint8_t *_blimit,
|
||||||
const uint8_t *_limit,
|
const uint8_t *_limit,
|
||||||
const uint8_t *_thresh, int bd) {
|
const uint8_t *_thresh, int bd) {
|
||||||
vpx_highbd_lpf_horizontal_edge_8_sse2(s, p, _blimit, _limit, _thresh, bd);
|
vpx_highbd_lpf_horizontal_16_sse2(s, p, _blimit, _limit, _thresh, bd);
|
||||||
vpx_highbd_lpf_horizontal_edge_8_sse2(s + 8, p, _blimit, _limit, _thresh, bd);
|
vpx_highbd_lpf_horizontal_16_sse2(s + 8, p, _blimit, _limit, _thresh, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
|
void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
|
||||||
@@ -1108,8 +1108,8 @@ void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,
|
|||||||
highbd_transpose(src, p, dst, 8, 2);
|
highbd_transpose(src, p, dst, 8, 2);
|
||||||
|
|
||||||
// Loop filtering
|
// Loop filtering
|
||||||
vpx_highbd_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh,
|
vpx_highbd_lpf_horizontal_16_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh,
|
||||||
bd);
|
bd);
|
||||||
src[0] = t_dst;
|
src[0] = t_dst;
|
||||||
src[1] = t_dst + 8 * 8;
|
src[1] = t_dst + 8 * 8;
|
||||||
dst[0] = s - 8;
|
dst[0] = s - 8;
|
||||||
@@ -1130,7 +1130,7 @@ void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int p,
|
|||||||
highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
|
highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
|
||||||
|
|
||||||
// Loop filtering
|
// Loop filtering
|
||||||
vpx_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit,
|
vpx_highbd_lpf_horizontal_16_dual_sse2(t_dst + 8 * 16, 16, blimit, limit,
|
||||||
thresh, bd);
|
thresh, bd);
|
||||||
|
|
||||||
// Transpose back
|
// Transpose back
|
||||||
|
|||||||
@@ -13,10 +13,10 @@
|
|||||||
#include "./vpx_dsp_rtcd.h"
|
#include "./vpx_dsp_rtcd.h"
|
||||||
#include "vpx_ports/mem.h"
|
#include "vpx_ports/mem.h"
|
||||||
|
|
||||||
void vpx_lpf_horizontal_edge_8_avx2(unsigned char *s, int p,
|
void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p,
|
||||||
const unsigned char *_blimit,
|
const unsigned char *_blimit,
|
||||||
const unsigned char *_limit,
|
const unsigned char *_limit,
|
||||||
const unsigned char *_thresh) {
|
const unsigned char *_thresh) {
|
||||||
__m128i mask, hev, flat, flat2;
|
__m128i mask, hev, flat, flat2;
|
||||||
const __m128i zero = _mm_set1_epi16(0);
|
const __m128i zero = _mm_set1_epi16(0);
|
||||||
const __m128i one = _mm_set1_epi8(1);
|
const __m128i one = _mm_set1_epi8(1);
|
||||||
@@ -367,7 +367,7 @@ DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = {
|
|||||||
8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128
|
8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128
|
||||||
};
|
};
|
||||||
|
|
||||||
void vpx_lpf_horizontal_edge_16_avx2(unsigned char *s, int p,
|
void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int p,
|
||||||
const unsigned char *_blimit,
|
const unsigned char *_blimit,
|
||||||
const unsigned char *_limit,
|
const unsigned char *_limit,
|
||||||
const unsigned char *_thresh) {
|
const unsigned char *_thresh) {
|
||||||
|
|||||||
@@ -229,10 +229,10 @@ void vpx_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */,
|
|||||||
*(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
|
*(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_edge_8_sse2(unsigned char *s, int p,
|
void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p,
|
||||||
const unsigned char *_blimit,
|
const unsigned char *_blimit,
|
||||||
const unsigned char *_limit,
|
const unsigned char *_limit,
|
||||||
const unsigned char *_thresh) {
|
const unsigned char *_thresh) {
|
||||||
const __m128i zero = _mm_set1_epi16(0);
|
const __m128i zero = _mm_set1_epi16(0);
|
||||||
const __m128i one = _mm_set1_epi8(1);
|
const __m128i one = _mm_set1_epi8(1);
|
||||||
const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
|
const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
|
||||||
@@ -591,7 +591,7 @@ static INLINE __m128i filter16_mask(const __m128i *const flat,
|
|||||||
return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result);
|
return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_edge_16_sse2(unsigned char *s, int p,
|
void vpx_lpf_horizontal_16_dual_sse2(unsigned char *s, int p,
|
||||||
const unsigned char *_blimit,
|
const unsigned char *_blimit,
|
||||||
const unsigned char *_limit,
|
const unsigned char *_limit,
|
||||||
const unsigned char *_thresh) {
|
const unsigned char *_thresh) {
|
||||||
@@ -1745,7 +1745,7 @@ void vpx_lpf_vertical_16_sse2(unsigned char *s, int p,
|
|||||||
transpose(src, p, dst, 8, 2);
|
transpose(src, p, dst, 8, 2);
|
||||||
|
|
||||||
// Loop filtering
|
// Loop filtering
|
||||||
vpx_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh);
|
vpx_lpf_horizontal_16_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh);
|
||||||
|
|
||||||
src[0] = t_dst;
|
src[0] = t_dst;
|
||||||
src[1] = t_dst + 8 * 8;
|
src[1] = t_dst + 8 * 8;
|
||||||
@@ -1766,7 +1766,7 @@ void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
|
|||||||
transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
|
transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
|
||||||
|
|
||||||
// Loop filtering
|
// Loop filtering
|
||||||
vpx_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh);
|
vpx_lpf_horizontal_16_dual_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh);
|
||||||
|
|
||||||
// Transpose back
|
// Transpose back
|
||||||
transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p);
|
transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p);
|
||||||
|
|||||||
Reference in New Issue
Block a user