split vpx_lpf_horizontal_16 in two
replace with vpx_lpf_horizontal_edge_16 and vpx_lpf_horizontal_edge_8 to avoid passing a count parameter Change-Id: I848c95c02a3c6ebaa6c2bdf0983dce05cd645271
This commit is contained in:
@@ -523,8 +523,10 @@ INSTANTIATE_TEST_CASE_P(
|
||||
::testing::Values(
|
||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_sse2>,
|
||||
&wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
|
||||
make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),
|
||||
make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_8_sse2>,
|
||||
&wrapper_nc<vpx_lpf_horizontal_edge_8_c>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_16_sse2>,
|
||||
&wrapper_nc<vpx_lpf_horizontal_edge_16_c>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_vertical_8_sse2>,
|
||||
&wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_vertical_16_sse2>,
|
||||
@@ -538,9 +540,10 @@ INSTANTIATE_TEST_CASE_P(
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
AVX2, Loop8Test6Param,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1),
|
||||
make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8,
|
||||
2)));
|
||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_8_avx2>,
|
||||
&wrapper_nc<vpx_lpf_horizontal_edge_8_c>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_16_avx2>,
|
||||
&wrapper_nc<vpx_lpf_horizontal_edge_16_c>, 8, 1)));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2
|
||||
@@ -597,10 +600,10 @@ INSTANTIATE_TEST_CASE_P(
|
||||
#if HAVE_NEON_ASM
|
||||
// Using #if inside the macro is unsupported on MSVS but the tests are not
|
||||
// currently built for MSVS with ARM and NEON.
|
||||
make_tuple(&vpx_lpf_horizontal_16_neon,
|
||||
&vpx_lpf_horizontal_16_c, 8, 1),
|
||||
make_tuple(&vpx_lpf_horizontal_16_neon,
|
||||
&vpx_lpf_horizontal_16_c, 8, 2),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_8_neon>,
|
||||
&wrapper_nc<vpx_lpf_horizontal_edge_8_c>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_16_neon>,
|
||||
&wrapper_nc<vpx_lpf_horizontal_edge_16_c>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_vertical_16_neon>,
|
||||
&wrapper_nc<vpx_lpf_vertical_16_c>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_vertical_16_dual_neon>,
|
||||
@@ -638,10 +641,10 @@ INSTANTIATE_TEST_CASE_P(
|
||||
&wrapper_nc<vpx_lpf_horizontal_4_c>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_dspr2>,
|
||||
&wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
|
||||
make_tuple(&vpx_lpf_horizontal_16_dspr2,
|
||||
&vpx_lpf_horizontal_16_c, 8, 1),
|
||||
make_tuple(&vpx_lpf_horizontal_16_dspr2,
|
||||
&vpx_lpf_horizontal_16_c, 8, 2),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_8>,
|
||||
&wrapper_nc<vpx_lpf_horizontal_edge_8>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_16>,
|
||||
&wrapper_nc<vpx_lpf_horizontal_edge_16>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_vertical_4_dspr2>,
|
||||
&wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_vertical_8_dspr2>,
|
||||
@@ -672,8 +675,10 @@ INSTANTIATE_TEST_CASE_P(
|
||||
&wrapper_nc<vpx_lpf_horizontal_4_c>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_msa>,
|
||||
&wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
|
||||
make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
|
||||
make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_8_msa>,
|
||||
&wrapper_nc<vpx_lpf_horizontal_edge_8_c>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_16_msa>,
|
||||
&wrapper_nc<vpx_lpf_horizontal_edge_16_c>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_vertical_4_msa>,
|
||||
&wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1),
|
||||
make_tuple(&wrapper_nc<vpx_lpf_vertical_8_msa>,
|
||||
|
@@ -512,12 +512,12 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
||||
if (mask & 1) {
|
||||
if (mask_16x16 & 1) {
|
||||
if ((mask_16x16 & 3) == 3) {
|
||||
vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr);
|
||||
count = 2;
|
||||
} else {
|
||||
vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 1);
|
||||
vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr);
|
||||
}
|
||||
} else if (mask_8x8 & 1) {
|
||||
if ((mask_8x8 & 3) == 3) {
|
||||
|
@@ -512,12 +512,12 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
||||
if (mask & 1) {
|
||||
if (mask_16x16 & 1) {
|
||||
if ((mask_16x16 & 3) == 3) {
|
||||
vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 2);
|
||||
vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr);
|
||||
count = 2;
|
||||
} else {
|
||||
vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, 1);
|
||||
vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr);
|
||||
}
|
||||
} else if (mask_8x8 & 1) {
|
||||
if ((mask_8x8 & 3) == 3) {
|
||||
|
@@ -8,27 +8,28 @@
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vpx_lpf_horizontal_16_neon|
|
||||
EXPORT |vpx_lpf_horizontal_edge_8_neon|
|
||||
EXPORT |vpx_lpf_horizontal_edge_16_neon|
|
||||
EXPORT |vpx_lpf_vertical_16_neon|
|
||||
ARM
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; void vpx_lpf_horizontal_16_neon(uint8_t *s, int p,
|
||||
; const uint8_t *blimit,
|
||||
; const uint8_t *limit,
|
||||
; const uint8_t *thresh
|
||||
; int count)
|
||||
; void mb_lpf_horizontal_edge(uint8_t *s, int p,
|
||||
; const uint8_t *blimit,
|
||||
; const uint8_t *limit,
|
||||
; const uint8_t *thresh,
|
||||
; int count)
|
||||
; r0 uint8_t *s,
|
||||
; r1 int p, /* pitch */
|
||||
; r2 const uint8_t *blimit,
|
||||
; r3 const uint8_t *limit,
|
||||
; sp const uint8_t *thresh,
|
||||
|vpx_lpf_horizontal_16_neon| PROC
|
||||
; r12 int count
|
||||
|mb_lpf_horizontal_edge| PROC
|
||||
push {r4-r8, lr}
|
||||
vpush {d8-d15}
|
||||
ldr r4, [sp, #88] ; load thresh
|
||||
ldr r12, [sp, #92] ; load count
|
||||
|
||||
h_count
|
||||
vld1.8 {d16[]}, [r2] ; load *blimit
|
||||
@@ -115,7 +116,35 @@ h_next
|
||||
vpop {d8-d15}
|
||||
pop {r4-r8, pc}
|
||||
|
||||
ENDP ; |vpx_lpf_horizontal_16_neon|
|
||||
ENDP ; |mb_lpf_horizontal_edge|
|
||||
|
||||
; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch,
|
||||
; const uint8_t *blimit,
|
||||
; const uint8_t *limit,
|
||||
; const uint8_t *thresh)
|
||||
; r0 uint8_t *s,
|
||||
; r1 int pitch,
|
||||
; r2 const uint8_t *blimit,
|
||||
; r3 const uint8_t *limit,
|
||||
; sp const uint8_t *thresh
|
||||
|vpx_lpf_horizontal_edge_8_neon| PROC
|
||||
mov r12, #1
|
||||
b mb_lpf_horizontal_edge
|
||||
ENDP ; |vpx_lpf_horizontal_edge_8_neon|
|
||||
|
||||
; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch,
|
||||
; const uint8_t *blimit,
|
||||
; const uint8_t *limit,
|
||||
; const uint8_t *thresh)
|
||||
; r0 uint8_t *s,
|
||||
; r1 int pitch,
|
||||
; r2 const uint8_t *blimit,
|
||||
; r3 const uint8_t *limit,
|
||||
; sp const uint8_t *thresh
|
||||
|vpx_lpf_horizontal_edge_16_neon| PROC
|
||||
mov r12, #2
|
||||
b mb_lpf_horizontal_edge
|
||||
ENDP ; |vpx_lpf_horizontal_edge_16_neon|
|
||||
|
||||
; void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
|
||||
; const uint8_t *blimit,
|
||||
|
@@ -289,9 +289,9 @@ static INLINE void filter16(int8_t mask, uint8_t thresh,
|
||||
}
|
||||
}
|
||||
|
||||
void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh,
|
||||
int count) {
|
||||
static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
|
||||
const uint8_t *limit,
|
||||
const uint8_t *thresh, int count) {
|
||||
int i;
|
||||
|
||||
// loop filter designed to work using chars so that we can make maximum use
|
||||
@@ -315,6 +315,16 @@ void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||
}
|
||||
}
|
||||
|
||||
void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh) {
|
||||
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1);
|
||||
}
|
||||
|
||||
void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh) {
|
||||
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2);
|
||||
}
|
||||
|
||||
static void mb_lpf_vertical_edge_w(uint8_t *s, int p,
|
||||
const uint8_t *blimit,
|
||||
const uint8_t *limit,
|
||||
|
@@ -423,11 +423,11 @@ void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
|
||||
}
|
||||
}
|
||||
|
||||
void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t *b_limit_ptr,
|
||||
const uint8_t *limit_ptr,
|
||||
const uint8_t *thresh_ptr,
|
||||
int32_t count) {
|
||||
static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch,
|
||||
const uint8_t *b_limit_ptr,
|
||||
const uint8_t *limit_ptr,
|
||||
const uint8_t *thresh_ptr,
|
||||
int32_t count) {
|
||||
if (1 == count) {
|
||||
uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
|
||||
uint64_t dword0, dword1;
|
||||
@@ -648,6 +648,20 @@ void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
|
||||
}
|
||||
}
|
||||
|
||||
void vpx_lpf_horizontal_edge_8_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t *b_limit_ptr,
|
||||
const uint8_t *limit_ptr,
|
||||
const uint8_t *thresh_ptr) {
|
||||
mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1);
|
||||
}
|
||||
|
||||
void vpx_lpf_horizontal_edge_16_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t *b_limit_ptr,
|
||||
const uint8_t *limit_ptr,
|
||||
const uint8_t *thresh_ptr) {
|
||||
mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 2);
|
||||
}
|
||||
|
||||
static void transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch,
|
||||
uint8_t *output, int32_t out_pitch) {
|
||||
v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org;
|
||||
|
@@ -19,12 +19,12 @@
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#if HAVE_DSPR2
|
||||
void vpx_lpf_horizontal_16_dspr2(unsigned char *s,
|
||||
int pitch,
|
||||
const uint8_t *blimit,
|
||||
const uint8_t *limit,
|
||||
const uint8_t *thresh,
|
||||
int count) {
|
||||
static void mb_lpf_horizontal_edge(unsigned char *s,
|
||||
int pitch,
|
||||
const uint8_t *blimit,
|
||||
const uint8_t *limit,
|
||||
const uint8_t *thresh,
|
||||
int count) {
|
||||
uint32_t mask;
|
||||
uint32_t hev, flat, flat2;
|
||||
uint8_t i;
|
||||
@@ -791,4 +791,18 @@ void vpx_lpf_horizontal_16_dspr2(unsigned char *s,
|
||||
s = s + 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vpx_lpf_horizontal_edge_8_dspr2(unsigned char *s, int pitch,
|
||||
const uint8_t *blimit,
|
||||
const uint8_t *limit,
|
||||
const uint8_t *thresh) {
|
||||
mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1);
|
||||
}
|
||||
|
||||
void vpx_lpf_horizontal_edge_16_dspr2(unsigned char *s, int pitch,
|
||||
const uint8_t *blimit,
|
||||
const uint8_t *limit,
|
||||
const uint8_t *thresh) {
|
||||
mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 2);
|
||||
}
|
||||
#endif // #if HAVE_DSPR2
|
||||
|
@@ -548,9 +548,13 @@ specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/;
|
||||
add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||
specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
|
||||
specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
|
||||
$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon;
|
||||
add_proto qw/void vpx_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||
specialize qw/vpx_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/;
|
||||
$vpx_lpf_horizontal_edge_8_neon_asm=vpx_lpf_horizontal_edge_8_neon;
|
||||
|
||||
add_proto qw/void vpx_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||
specialize qw/vpx_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/;
|
||||
$vpx_lpf_horizontal_edge_16_neon_asm=vpx_lpf_horizontal_edge_16_neon;
|
||||
|
||||
add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||
specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
|
||||
|
@@ -13,9 +13,10 @@
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,
|
||||
const unsigned char *_blimit, const unsigned char *_limit,
|
||||
const unsigned char *_thresh) {
|
||||
void vpx_lpf_horizontal_edge_8_avx2(unsigned char *s, int p,
|
||||
const unsigned char *_blimit,
|
||||
const unsigned char *_limit,
|
||||
const unsigned char *_thresh) {
|
||||
__m128i mask, hev, flat, flat2;
|
||||
const __m128i zero = _mm_set1_epi16(0);
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
@@ -400,9 +401,10 @@ DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = {
|
||||
8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128
|
||||
};
|
||||
|
||||
static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
|
||||
const unsigned char *_blimit, const unsigned char *_limit,
|
||||
const unsigned char *_thresh) {
|
||||
void vpx_lpf_horizontal_edge_16_avx2(unsigned char *s, int p,
|
||||
const unsigned char *_blimit,
|
||||
const unsigned char *_limit,
|
||||
const unsigned char *_thresh) {
|
||||
__m128i mask, hev, flat, flat2;
|
||||
const __m128i zero = _mm_set1_epi16(0);
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
@@ -975,12 +977,3 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
|
||||
_mm_storeu_si128((__m128i *) (s + 6 * p), q6);
|
||||
}
|
||||
}
|
||||
|
||||
void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p,
|
||||
const unsigned char *_blimit, const unsigned char *_limit,
|
||||
const unsigned char *_thresh, int count) {
|
||||
if (count == 1)
|
||||
mb_lpf_horizontal_edge_w_avx2_8(s, p, _blimit, _limit, _thresh);
|
||||
else
|
||||
mb_lpf_horizontal_edge_w_avx2_16(s, p, _blimit, _limit, _thresh);
|
||||
}
|
||||
|
@@ -18,11 +18,10 @@ static INLINE __m128i abs_diff(__m128i a, __m128i b) {
|
||||
return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a));
|
||||
}
|
||||
|
||||
static void mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *_blimit,
|
||||
const unsigned char *_limit,
|
||||
const unsigned char *_thresh) {
|
||||
void vpx_lpf_horizontal_edge_8_sse2(unsigned char *s, int p,
|
||||
const unsigned char *_blimit,
|
||||
const unsigned char *_limit,
|
||||
const unsigned char *_thresh) {
|
||||
const __m128i zero = _mm_set1_epi16(0);
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
|
||||
@@ -383,11 +382,10 @@ static INLINE __m128i filter16_mask(const __m128i *const flat,
|
||||
return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result);
|
||||
}
|
||||
|
||||
static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *_blimit,
|
||||
const unsigned char *_limit,
|
||||
const unsigned char *_thresh) {
|
||||
void vpx_lpf_horizontal_edge_16_sse2(unsigned char *s, int p,
|
||||
const unsigned char *_blimit,
|
||||
const unsigned char *_limit,
|
||||
const unsigned char *_thresh) {
|
||||
const __m128i zero = _mm_set1_epi16(0);
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
|
||||
@@ -716,17 +714,6 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
|
||||
void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p,
|
||||
const unsigned char *_blimit,
|
||||
const unsigned char *_limit,
|
||||
const unsigned char *_thresh, int count) {
|
||||
if (count == 1)
|
||||
mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh);
|
||||
else
|
||||
mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);
|
||||
}
|
||||
|
||||
void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,
|
||||
const unsigned char *_blimit,
|
||||
const unsigned char *_limit,
|
||||
@@ -1554,7 +1541,7 @@ void vpx_lpf_vertical_16_sse2(unsigned char *s, int p,
|
||||
transpose(src, p, dst, 8, 2);
|
||||
|
||||
// Loop filtering
|
||||
mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit, thresh);
|
||||
vpx_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh);
|
||||
|
||||
src[0] = t_dst;
|
||||
src[1] = t_dst + 8 * 8;
|
||||
@@ -1575,8 +1562,7 @@ void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
|
||||
transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
|
||||
|
||||
// Loop filtering
|
||||
mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit,
|
||||
thresh);
|
||||
vpx_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh);
|
||||
|
||||
// Transpose back
|
||||
transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p);
|
||||
|
Reference in New Issue
Block a user