vpx_lpf_horizontal_4: remove unused count param
Change-Id: Iec7d8eda343991f7d7d46931dca17af23c821d11
This commit is contained in:
@@ -458,7 +458,8 @@ using std::tr1::make_tuple;
|
|||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
MMX, Loop8Test6Param,
|
MMX, Loop8Test6Param,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vpx_lpf_horizontal_4_mmx, &vpx_lpf_horizontal_4_c, 8, 1),
|
make_tuple(&wrapper_nc<vpx_lpf_horizontal_4_mmx>,
|
||||||
|
&wrapper_nc<vpx_lpf_horizontal_4_c>, 8, 1),
|
||||||
make_tuple(&wrapper_nc<vpx_lpf_vertical_4_mmx>,
|
make_tuple(&wrapper_nc<vpx_lpf_vertical_4_mmx>,
|
||||||
&wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1)));
|
&wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1)));
|
||||||
#endif // HAVE_MMX
|
#endif // HAVE_MMX
|
||||||
@@ -609,8 +610,8 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
&wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
|
&wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
|
||||||
make_tuple(&wrapper_nc<vpx_lpf_vertical_8_neon>,
|
make_tuple(&wrapper_nc<vpx_lpf_vertical_8_neon>,
|
||||||
&wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1),
|
&wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1),
|
||||||
make_tuple(&vpx_lpf_horizontal_4_neon,
|
make_tuple(&wrapper_nc<vpx_lpf_horizontal_4_neon>,
|
||||||
&vpx_lpf_horizontal_4_c, 8, 1),
|
&wrapper_nc<vpx_lpf_horizontal_4_c>, 8, 1),
|
||||||
make_tuple(&wrapper_nc<vpx_lpf_vertical_4_neon>,
|
make_tuple(&wrapper_nc<vpx_lpf_vertical_4_neon>,
|
||||||
&wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1)));
|
&wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1)));
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
@@ -633,7 +634,8 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
DSPR2, Loop8Test6Param,
|
DSPR2, Loop8Test6Param,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8, 1),
|
make_tuple(&wrapper_nc<vpx_lpf_horizontal_4_dspr2>,
|
||||||
|
&wrapper_nc<vpx_lpf_horizontal_4_c>, 8, 1),
|
||||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_dspr2>,
|
make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_dspr2>,
|
||||||
&wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
|
&wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
|
||||||
make_tuple(&vpx_lpf_horizontal_16_dspr2,
|
make_tuple(&vpx_lpf_horizontal_16_dspr2,
|
||||||
@@ -666,7 +668,8 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
MSA, Loop8Test6Param,
|
MSA, Loop8Test6Param,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8, 1),
|
make_tuple(&wrapper_nc<vpx_lpf_horizontal_4_msa>,
|
||||||
|
&wrapper_nc<vpx_lpf_horizontal_4_c>, 8, 1),
|
||||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_msa>,
|
make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_msa>,
|
||||||
&wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
|
&wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
|
||||||
make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
|
make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
|
||||||
|
@@ -535,10 +535,10 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||||||
} else {
|
} else {
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
else if (mask_4x4_int & 2)
|
else if (mask_4x4_int & 2)
|
||||||
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
||||||
lfin->lim, lfin->hev_thr, 1);
|
lfin->lim, lfin->hev_thr);
|
||||||
}
|
}
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
@@ -546,7 +546,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||||||
|
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
} else if (mask_4x4 & 1) {
|
} else if (mask_4x4 & 1) {
|
||||||
if ((mask_4x4 & 3) == 3) {
|
if ((mask_4x4 & 3) == 3) {
|
||||||
@@ -563,22 +563,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||||||
} else {
|
} else {
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
else if (mask_4x4_int & 2)
|
else if (mask_4x4_int & 2)
|
||||||
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
||||||
lfin->lim, lfin->hev_thr, 1);
|
lfin->lim, lfin->hev_thr);
|
||||||
}
|
}
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
|
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
} else if (mask_4x4_int & 1) {
|
} else if (mask_4x4_int & 1) {
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s += 8 * count;
|
s += 8 * count;
|
||||||
|
@@ -535,10 +535,10 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||||||
} else {
|
} else {
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
else if (mask_4x4_int & 2)
|
else if (mask_4x4_int & 2)
|
||||||
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
||||||
lfin->lim, lfin->hev_thr, 1);
|
lfin->lim, lfin->hev_thr);
|
||||||
}
|
}
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
@@ -546,7 +546,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||||||
|
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
} else if (mask_4x4 & 1) {
|
} else if (mask_4x4 & 1) {
|
||||||
if ((mask_4x4 & 3) == 3) {
|
if ((mask_4x4 & 3) == 3) {
|
||||||
@@ -563,22 +563,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||||||
} else {
|
} else {
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
else if (mask_4x4_int & 2)
|
else if (mask_4x4_int & 2)
|
||||||
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
|
||||||
lfin->lim, lfin->hev_thr, 1);
|
lfin->lim, lfin->hev_thr);
|
||||||
}
|
}
|
||||||
count = 2;
|
count = 2;
|
||||||
} else {
|
} else {
|
||||||
vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
|
|
||||||
if (mask_4x4_int & 1)
|
if (mask_4x4_int & 1)
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
} else if (mask_4x4_int & 1) {
|
} else if (mask_4x4_int & 1) {
|
||||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||||
lfi->hev_thr, 1);
|
lfi->hev_thr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s += 8 * count;
|
s += 8 * count;
|
||||||
|
@@ -16,37 +16,28 @@
|
|||||||
|
|
||||||
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
|
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
|
||||||
; works on 16 iterations at a time.
|
; works on 16 iterations at a time.
|
||||||
; TODO(fgalligan): See about removing the count code as this function is only
|
|
||||||
; called with a count of 1.
|
|
||||||
;
|
;
|
||||||
; void vpx_lpf_horizontal_4_neon(uint8_t *s,
|
; void vpx_lpf_horizontal_4_neon(uint8_t *s,
|
||||||
; int p /* pitch */,
|
; int p /* pitch */,
|
||||||
; const uint8_t *blimit,
|
; const uint8_t *blimit,
|
||||||
; const uint8_t *limit,
|
; const uint8_t *limit,
|
||||||
; const uint8_t *thresh,
|
; const uint8_t *thresh)
|
||||||
; int count)
|
|
||||||
;
|
;
|
||||||
; r0 uint8_t *s,
|
; r0 uint8_t *s,
|
||||||
; r1 int p, /* pitch */
|
; r1 int p, /* pitch */
|
||||||
; r2 const uint8_t *blimit,
|
; r2 const uint8_t *blimit,
|
||||||
; r3 const uint8_t *limit,
|
; r3 const uint8_t *limit,
|
||||||
; sp const uint8_t *thresh,
|
; sp const uint8_t *thresh,
|
||||||
; sp+4 int count
|
|
||||||
|vpx_lpf_horizontal_4_neon| PROC
|
|vpx_lpf_horizontal_4_neon| PROC
|
||||||
push {lr}
|
push {lr}
|
||||||
|
|
||||||
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
||||||
ldr r12, [sp, #8] ; load count
|
|
||||||
ldr r2, [sp, #4] ; load thresh
|
ldr r2, [sp, #4] ; load thresh
|
||||||
add r1, r1, r1 ; double pitch
|
add r1, r1, r1 ; double pitch
|
||||||
|
|
||||||
cmp r12, #0
|
|
||||||
beq end_vpx_lf_h_edge
|
|
||||||
|
|
||||||
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
||||||
vld1.8 {d2[]}, [r2] ; duplicate *thresh
|
vld1.8 {d2[]}, [r2] ; duplicate *thresh
|
||||||
|
|
||||||
count_lf_h_loop
|
|
||||||
sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines
|
sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines
|
||||||
add r3, r2, r1, lsr #1 ; set to 3 lines down
|
add r3, r2, r1, lsr #1 ; set to 3 lines down
|
||||||
|
|
||||||
@@ -69,11 +60,6 @@ count_lf_h_loop
|
|||||||
vst1.u8 {d6}, [r2@64], r1 ; store oq0
|
vst1.u8 {d6}, [r2@64], r1 ; store oq0
|
||||||
vst1.u8 {d7}, [r3@64], r1 ; store oq1
|
vst1.u8 {d7}, [r3@64], r1 ; store oq1
|
||||||
|
|
||||||
add r0, r0, #8
|
|
||||||
subs r12, r12, #1
|
|
||||||
bne count_lf_h_loop
|
|
||||||
|
|
||||||
end_vpx_lf_h_edge
|
|
||||||
pop {pc}
|
pop {pc}
|
||||||
ENDP ; |vpx_lpf_horizontal_4_neon|
|
ENDP ; |vpx_lpf_horizontal_4_neon|
|
||||||
|
|
||||||
|
@@ -115,22 +115,18 @@ void vpx_lpf_horizontal_4_neon(
|
|||||||
int pitch,
|
int pitch,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh,
|
const uint8_t *thresh) {
|
||||||
int count) {
|
|
||||||
int i;
|
int i;
|
||||||
uint8_t *s, *psrc;
|
uint8_t *s, *psrc;
|
||||||
uint8x8_t dblimit, dlimit, dthresh;
|
uint8x8_t dblimit, dlimit, dthresh;
|
||||||
uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
|
uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
|
||||||
|
|
||||||
if (count == 0) // end_vpx_lf_h_edge
|
|
||||||
return;
|
|
||||||
|
|
||||||
dblimit = vld1_u8(blimit);
|
dblimit = vld1_u8(blimit);
|
||||||
dlimit = vld1_u8(limit);
|
dlimit = vld1_u8(limit);
|
||||||
dthresh = vld1_u8(thresh);
|
dthresh = vld1_u8(thresh);
|
||||||
|
|
||||||
psrc = src - (pitch << 2);
|
psrc = src - (pitch << 2);
|
||||||
for (i = 0; i < count; i++) {
|
for (i = 0; i < 1; i++) {
|
||||||
s = psrc + i * 8;
|
s = psrc + i * 8;
|
||||||
|
|
||||||
d3u8 = vld1_u8(s);
|
d3u8 = vld1_u8(s);
|
||||||
|
@@ -119,12 +119,12 @@ static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1,
|
|||||||
|
|
||||||
void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
|
void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
|
||||||
const uint8_t *blimit, const uint8_t *limit,
|
const uint8_t *blimit, const uint8_t *limit,
|
||||||
const uint8_t *thresh, int count) {
|
const uint8_t *thresh) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8 * count; ++i) {
|
for (i = 0; i < 8; ++i) {
|
||||||
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
||||||
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
||||||
const int8_t mask = filter_mask(*limit, *blimit,
|
const int8_t mask = filter_mask(*limit, *blimit,
|
||||||
@@ -138,8 +138,8 @@ void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
|
|||||||
const uint8_t *limit0, const uint8_t *thresh0,
|
const uint8_t *limit0, const uint8_t *thresh0,
|
||||||
const uint8_t *blimit1, const uint8_t *limit1,
|
const uint8_t *blimit1, const uint8_t *limit1,
|
||||||
const uint8_t *thresh1) {
|
const uint8_t *thresh1) {
|
||||||
vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
|
vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
|
vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
|
void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
|
||||||
|
@@ -13,14 +13,11 @@
|
|||||||
void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
|
void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
|
||||||
const uint8_t *b_limit_ptr,
|
const uint8_t *b_limit_ptr,
|
||||||
const uint8_t *limit_ptr,
|
const uint8_t *limit_ptr,
|
||||||
const uint8_t *thresh_ptr,
|
const uint8_t *thresh_ptr) {
|
||||||
int32_t count) {
|
|
||||||
uint64_t p1_d, p0_d, q0_d, q1_d;
|
uint64_t p1_d, p0_d, q0_d, q1_d;
|
||||||
v16u8 mask, hev, flat, thresh, b_limit, limit;
|
v16u8 mask, hev, flat, thresh, b_limit, limit;
|
||||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out;
|
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out;
|
||||||
|
|
||||||
(void)count;
|
|
||||||
|
|
||||||
/* load vector elements */
|
/* load vector elements */
|
||||||
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
||||||
|
|
||||||
|
@@ -23,8 +23,7 @@ void vpx_lpf_horizontal_4_dspr2(unsigned char *s,
|
|||||||
int pitch,
|
int pitch,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh,
|
const uint8_t *thresh) {
|
||||||
int count) {
|
|
||||||
uint8_t i;
|
uint8_t i;
|
||||||
uint32_t mask;
|
uint32_t mask;
|
||||||
uint32_t hev;
|
uint32_t hev;
|
||||||
@@ -312,8 +311,8 @@ void vpx_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,
|
|||||||
const uint8_t *blimit1,
|
const uint8_t *blimit1,
|
||||||
const uint8_t *limit1,
|
const uint8_t *limit1,
|
||||||
const uint8_t *thresh1) {
|
const uint8_t *thresh1) {
|
||||||
vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
|
vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
|
vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
|
void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
|
||||||
|
@@ -559,7 +559,7 @@ add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint
|
|||||||
specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
|
specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
|
||||||
$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
|
$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
|
add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||||
specialize qw/vpx_lpf_horizontal_4 mmx neon dspr2 msa/;
|
specialize qw/vpx_lpf_horizontal_4 mmx neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||||
|
@@ -18,14 +18,13 @@
|
|||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *blimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh
|
||||||
; int count
|
|
||||||
;)
|
;)
|
||||||
global sym(vpx_lpf_horizontal_4_mmx) PRIVATE
|
global sym(vpx_lpf_horizontal_4_mmx) PRIVATE
|
||||||
sym(vpx_lpf_horizontal_4_mmx):
|
sym(vpx_lpf_horizontal_4_mmx):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 6
|
SHADOW_ARGS_TO_STACK 5
|
||||||
GET_GOT rbx
|
GET_GOT rbx
|
||||||
push rsi
|
push rsi
|
||||||
push rdi
|
push rdi
|
||||||
@@ -39,8 +38,6 @@ sym(vpx_lpf_horizontal_4_mmx):
|
|||||||
mov rsi, arg(0) ;src_ptr
|
mov rsi, arg(0) ;src_ptr
|
||||||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||||
|
|
||||||
movsxd rcx, dword ptr arg(5) ;count
|
|
||||||
.next8_h:
|
|
||||||
mov rdx, arg(3) ;limit
|
mov rdx, arg(3) ;limit
|
||||||
movq mm7, [rdx]
|
movq mm7, [rdx]
|
||||||
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
||||||
@@ -208,11 +205,6 @@ sym(vpx_lpf_horizontal_4_mmx):
|
|||||||
pxor mm7, [GLOBAL(t80)] ; unoffset
|
pxor mm7, [GLOBAL(t80)] ; unoffset
|
||||||
movq [rdi], mm7 ; write back
|
movq [rdi], mm7 ; write back
|
||||||
|
|
||||||
add rsi,8
|
|
||||||
neg rax
|
|
||||||
dec rcx
|
|
||||||
jnz .next8_h
|
|
||||||
|
|
||||||
add rsp, 32
|
add rsp, 32
|
||||||
pop rsp
|
pop rsp
|
||||||
; begin epilog
|
; begin epilog
|
||||||
|
Reference in New Issue
Block a user