Merge remote branch 'internal/upstream' into HEAD

This commit is contained in:
John Koleszar
2011-01-22 00:05:13 -05:00
3 changed files with 328 additions and 113 deletions

View File

@@ -1316,6 +1316,43 @@ void vp8_end_second_pass(VP8_COMP *cpi)
{ {
} }
// This function gives and estimate of how badly we believe
// the predicition quality is decaying from frame to frame.
double gf_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
{
double prediction_decay_rate;
double motion_decay;
double motion_pct = next_frame->pcnt_motion;
// Initial basis is the % mbs inter coded
prediction_decay_rate = next_frame->pcnt_inter;
// High % motion -> somewhat higher decay rate
motion_decay = (1.0 - (motion_pct / 20.0));
if (motion_decay < prediction_decay_rate)
prediction_decay_rate = motion_decay;
// Adjustment to decay rate based on speed of motion
{
double this_mv_rabs;
double this_mv_cabs;
double distance_factor;
this_mv_rabs = fabs(next_frame->mvr_abs * motion_pct);
this_mv_cabs = fabs(next_frame->mvc_abs * motion_pct);
distance_factor = sqrt((this_mv_rabs * this_mv_rabs) +
(this_mv_cabs * this_mv_cabs)) / 250.0;
distance_factor = ((distance_factor > 1.0)
? 0.0 : (1.0 - distance_factor));
if (distance_factor < prediction_decay_rate)
prediction_decay_rate = distance_factor;
}
return prediction_decay_rate;
}
// Analyse and define a gf/arf group . // Analyse and define a gf/arf group .
static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
{ {
@@ -1468,36 +1505,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
if (r > GF_RMAX) if (r > GF_RMAX)
r = GF_RMAX; r = GF_RMAX;
// Adjust loop decay rate loop_decay_rate = gf_prediction_decay_rate(cpi, &next_frame);
//if ( next_frame.pcnt_inter < loop_decay_rate )
loop_decay_rate = next_frame.pcnt_inter;
// High % motion -> somewhat higher decay rate
motion_decay = (1.0 - (motion_pct / 20.0));
if (motion_decay < loop_decay_rate)
loop_decay_rate = motion_decay;
// Adjustment to decay rate based on speed of motion
{
double this_mv_rabs;
double this_mv_cabs;
double distance_factor;
this_mv_rabs = fabs(next_frame.mvr_abs * motion_pct);
this_mv_cabs = fabs(next_frame.mvc_abs * motion_pct);
distance_factor = sqrt((this_mv_rabs * this_mv_rabs) +
(this_mv_cabs * this_mv_cabs)) / 250.0;
distance_factor = ((distance_factor > 1.0)
? 0.0 : (1.0 - distance_factor));
if (distance_factor < loop_decay_rate)
loop_decay_rate = distance_factor;
}
// Cumulative effect of decay // Cumulative effect of decay
decay_accumulator = decay_accumulator * loop_decay_rate; decay_accumulator = decay_accumulator * loop_decay_rate;
decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator; decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator;
//decay_accumulator = ( loop_decay_rate < decay_accumulator ) ? loop_decay_rate : decay_accumulator;
boost_score += (decay_accumulator * r); boost_score += (decay_accumulator * r);
@@ -1508,11 +1520,42 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
(loop_decay_rate >= 0.999) && (loop_decay_rate >= 0.999) &&
(decay_accumulator < 0.9) ) (decay_accumulator < 0.9) )
{ {
// Force GF not alt ref int j;
allow_alt_ref = FALSE; FIRSTPASS_STATS * position = cpi->stats_in;
FIRSTPASS_STATS tmp_next_frame;
double decay_rate;
boost_score = old_boost_score; // Look ahead a few frames to see if static condition
break; // persists...
for ( j = 0; j < 4; j++ )
{
if (EOF == vp8_input_stats(cpi, &tmp_next_frame))
break;
decay_rate = gf_prediction_decay_rate(cpi, &tmp_next_frame);
if ( decay_rate < 0.999 )
break;
}
reset_fpf_position(cpi, position); // Reset file position
// Force GF not alt ref
if ( j == 4 )
{
if (0)
{
FILE *f = fopen("fadegf.stt", "a");
fprintf(f, " %8d %8d %10.4f %10.4f %10.4f\n",
cpi->common.current_video_frame+i, i,
loop_decay_rate, decay_accumulator,
boost_score );
fclose(f);
}
allow_alt_ref = FALSE;
boost_score = old_boost_score;
break;
}
} }
// Break out conditions. // Break out conditions.

View File

@@ -493,8 +493,8 @@ sym(vp8_get8x8var_sse2):
; unsigned char *src_ptr, ; unsigned char *src_ptr,
; int src_pixels_per_line, ; int src_pixels_per_line,
; unsigned int Height, ; unsigned int Height,
; unsigned short *HFilter, ; int xoffset,
; unsigned short *VFilter, ; int yoffset,
; int *sum, ; int *sum,
; unsigned int *sumsquared;; ; unsigned int *sumsquared;;
; ;
@@ -504,68 +504,80 @@ sym(vp8_filter_block2d_bil_var_sse2):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 9 SHADOW_ARGS_TO_STACK 9
SAVE_XMM
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
sub rsp, 16 push rbx
; end prolog ; end prolog
pxor xmm6, xmm6 ; pxor xmm6, xmm6 ;
pxor xmm7, xmm7 ; pxor xmm7, xmm7 ;
mov rax, arg(5) ;HFilter ;
mov rdx, arg(6) ;VFilter ; lea rsi, [GLOBAL(xmm_bi_rd)] ; rounding
mov rsi, arg(0) ;ref_ptr ; movdqa xmm4, XMMWORD PTR [rsi]
mov rdi, arg(2) ;src_ptr ; lea rcx, [GLOBAL(vp8_bilinear_filters_sse2)]
movsxd rcx, dword ptr arg(4) ;Height ; movsxd rax, dword ptr arg(5) ; xoffset
cmp rax, 0 ; skip first_pass filter if xoffset=0
je filter_block2d_bil_var_sse2_sp_only
shl rax, 5 ; point to filter coeff with xoffset
lea rax, [rax + rcx] ; HFilter
movsxd rdx, dword ptr arg(6) ; yoffset
cmp rdx, 0 ; skip second_pass filter if yoffset=0
je filter_block2d_bil_var_sse2_fp_only
shl rdx, 5
lea rdx, [rdx + rcx] ; VFilter
mov rsi, arg(0) ;ref_ptr
mov rdi, arg(2) ;src_ptr
movsxd rcx, dword ptr arg(4) ;Height
pxor xmm0, xmm0 ; pxor xmm0, xmm0 ;
movq xmm1, QWORD PTR [rsi] ; movq xmm1, QWORD PTR [rsi] ;
movq xmm3, QWORD PTR [rsi+1] ;
movq xmm3, QWORD PTR [rsi+1] ;
punpcklbw xmm1, xmm0 ; punpcklbw xmm1, xmm0 ;
pmullw xmm1, [rax] ;
pmullw xmm1, [rax] ;
punpcklbw xmm3, xmm0 punpcklbw xmm3, xmm0
;
pmullw xmm3, [rax+16] ; pmullw xmm3, [rax+16] ;
paddw xmm1, xmm3 ; paddw xmm1, xmm3 ;
paddw xmm1, xmm4 ;
paddw xmm1, [GLOBAL(xmm_bi_rd)] ; psraw xmm1, xmm_filter_shift ;
psraw xmm1, xmm_filter_shift ;
movdqa xmm5, xmm1 movdqa xmm5, xmm1
%if ABI_IS_32BIT
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
%else
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
add rsi, r8
%endif
filter_block2d_bil_var_sse2_loop:
movsxd rbx, dword ptr arg(1) ;ref_pixels_per_line
lea rsi, [rsi + rbx]
%if ABI_IS_32BIT=0
movsxd r9, dword ptr arg(3) ;src_pixels_per_line
%endif
filter_block2d_bil_var_sse2_loop:
movq xmm1, QWORD PTR [rsi] ; movq xmm1, QWORD PTR [rsi] ;
movq xmm3, QWORD PTR [rsi+1] ; movq xmm3, QWORD PTR [rsi+1] ;
punpcklbw xmm1, xmm0 ; punpcklbw xmm1, xmm0 ;
pmullw xmm1, [rax] ; pmullw xmm1, [rax] ;
punpcklbw xmm3, xmm0 ; punpcklbw xmm3, xmm0 ;
pmullw xmm3, [rax+16] ; pmullw xmm3, [rax+16] ;
paddw xmm1, xmm3 ; paddw xmm1, xmm3 ;
paddw xmm1, [GLOBAL(xmm_bi_rd)] ; paddw xmm1, xmm4 ;
psraw xmm1, xmm_filter_shift ; psraw xmm1, xmm_filter_shift ;
movdqa xmm3, xmm5 ; movdqa xmm3, xmm5 ;
movdqa xmm5, xmm1 ; movdqa xmm5, xmm1 ;
pmullw xmm3, [rdx] ;
pmullw xmm3, [rdx] ;
pmullw xmm1, [rdx+16] ; pmullw xmm1, [rdx+16] ;
paddw xmm1, xmm3 ; paddw xmm1, xmm3 ;
paddw xmm1, xmm4 ;
paddw xmm1, [GLOBAL(xmm_bi_rd)] ;
psraw xmm1, xmm_filter_shift ; psraw xmm1, xmm_filter_shift ;
movq xmm3, QWORD PTR [rdi] ; movq xmm3, QWORD PTR [rdi] ;
@@ -577,20 +589,103 @@ filter_block2d_bil_var_sse2_loop:
pmaddwd xmm1, xmm1 ; pmaddwd xmm1, xmm1 ;
paddd xmm7, xmm1 ; paddd xmm7, xmm1 ;
lea rsi, [rsi + rbx] ;ref_pixels_per_line
%if ABI_IS_32BIT %if ABI_IS_32BIT
add rsi, dword ptr arg(1) ;ref_pixels_per_line ; add rdi, dword ptr arg(3) ;src_pixels_per_line
add rdi, dword ptr arg(3) ;src_pixels_per_line ;
%else %else
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ; lea rdi, [rdi + r9]
movsxd r9, dword ptr arg(3) ;src_pixels_per_line ;
add rsi, r8
add rdi, r9
%endif %endif
sub rcx, 1 ; sub rcx, 1 ;
jnz filter_block2d_bil_var_sse2_loop ; jnz filter_block2d_bil_var_sse2_loop ;
jmp filter_block2d_bil_variance
filter_block2d_bil_var_sse2_sp_only:
movsxd rdx, dword ptr arg(6) ; yoffset
shl rdx, 5
lea rdx, [rdx + rcx] ; VFilter
mov rsi, arg(0) ;ref_ptr
mov rdi, arg(2) ;src_ptr
movsxd rcx, dword ptr arg(4) ;Height
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
pxor xmm0, xmm0 ;
movq xmm1, QWORD PTR [rsi] ;
punpcklbw xmm1, xmm0 ;
movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
lea rsi, [rsi + rax]
filter_block2d_bil_sp_only_loop:
movq xmm3, QWORD PTR [rsi] ;
punpcklbw xmm3, xmm0 ;
movdqa xmm5, xmm3
pmullw xmm1, [rdx] ;
pmullw xmm3, [rdx+16] ;
paddw xmm1, xmm3 ;
paddw xmm1, xmm4 ;
psraw xmm1, xmm_filter_shift ;
movq xmm3, QWORD PTR [rdi] ;
punpcklbw xmm3, xmm0 ;
psubw xmm1, xmm3 ;
paddw xmm6, xmm1 ;
pmaddwd xmm1, xmm1 ;
paddd xmm7, xmm1 ;
movdqa xmm1, xmm5 ;
lea rsi, [rsi + rax] ;ref_pixels_per_line
lea rdi, [rdi + rbx] ;src_pixels_per_line
sub rcx, 1 ;
jnz filter_block2d_bil_sp_only_loop ;
jmp filter_block2d_bil_variance
filter_block2d_bil_var_sse2_fp_only:
mov rsi, arg(0) ;ref_ptr
mov rdi, arg(2) ;src_ptr
movsxd rcx, dword ptr arg(4) ;Height
movsxd rdx, dword ptr arg(1) ;ref_pixels_per_line
pxor xmm0, xmm0 ;
movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
filter_block2d_bil_fp_only_loop:
movq xmm1, QWORD PTR [rsi] ;
movq xmm3, QWORD PTR [rsi+1] ;
punpcklbw xmm1, xmm0 ;
pmullw xmm1, [rax] ;
punpcklbw xmm3, xmm0 ;
pmullw xmm3, [rax+16] ;
paddw xmm1, xmm3 ;
paddw xmm1, xmm4 ;
psraw xmm1, xmm_filter_shift ;
movq xmm3, QWORD PTR [rdi] ;
punpcklbw xmm3, xmm0 ;
psubw xmm1, xmm3 ;
paddw xmm6, xmm1 ;
pmaddwd xmm1, xmm1 ;
paddd xmm7, xmm1 ;
lea rsi, [rsi + rdx]
lea rdi, [rdi + rbx] ;src_pixels_per_line
sub rcx, 1 ;
jnz filter_block2d_bil_fp_only_loop ;
jmp filter_block2d_bil_variance
filter_block2d_bil_variance:
movdq2q mm6, xmm6 ; movdq2q mm6, xmm6 ;
movdq2q mm7, xmm7 ; movdq2q mm7, xmm7 ;
@@ -627,12 +722,12 @@ filter_block2d_bil_var_sse2_loop:
movd [rsi], mm2 ; xsum movd [rsi], mm2 ; xsum
movd [rdi], mm4 ; xxsum movd [rdi], mm4 ; xxsum
; begin epilog ; begin epilog
add rsp, 16 pop rbx
pop rdi pop rdi
pop rsi pop rsi
RESTORE_GOT RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS UNSHADOW_ARGS
pop rbp pop rbp
ret ret
@@ -974,3 +1069,13 @@ SECTION_RODATA
align 16 align 16
xmm_bi_rd: xmm_bi_rd:
times 8 dw 64 times 8 dw 64
align 16
vp8_bilinear_filters_sse2:
dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0
dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16
dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32
dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48
dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80
dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96
dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112

View File

@@ -76,8 +76,8 @@ void vp8_filter_block2d_bil_var_sse2
const unsigned char *src_ptr, const unsigned char *src_ptr,
int src_pixels_per_line, int src_pixels_per_line,
unsigned int Height, unsigned int Height,
const short *HFilter, int xoffset,
const short *VFilter, int yoffset,
int *sum, int *sum,
unsigned int *sumsquared unsigned int *sumsquared
); );
@@ -222,21 +222,6 @@ unsigned int vp8_variance8x16_wmt
} }
///////////////////////////////////////////////////////////////////////////
// the mmx function that does the bilinear filtering and var calculation //
// int one pass //
///////////////////////////////////////////////////////////////////////////
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_xmm[8][16]) =
{
{ 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 },
{ 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
{ 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
{ 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
{ 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
{ 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 }
};
unsigned int vp8_sub_pixel_variance4x4_wmt unsigned int vp8_sub_pixel_variance4x4_wmt
( (
const unsigned char *src_ptr, const unsigned char *src_ptr,
@@ -272,15 +257,38 @@ unsigned int vp8_sub_pixel_variance8x8_wmt
unsigned int *sse unsigned int *sse
) )
{ {
int xsum; int xsum;
unsigned int xxsum; unsigned int xxsum;
vp8_filter_block2d_bil_var_sse2(
src_ptr, src_pixels_per_line, if (xoffset == 4 && yoffset == 0)
dst_ptr, dst_pixels_per_line, 8, {
vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset], vp8_half_horiz_variance16x_h_sse2(
&xsum, &xxsum src_ptr, src_pixels_per_line,
); dst_ptr, dst_pixels_per_line, 8,
&xsum, &xxsum);
}
else if (xoffset == 0 && yoffset == 4)
{
vp8_half_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
&xsum, &xxsum);
}
else if (xoffset == 4 && yoffset == 4)
{
vp8_half_horiz_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
&xsum, &xxsum);
}
else
{
vp8_filter_block2d_bil_var_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
xoffset, yoffset,
&xsum, &xxsum);
}
*sse = xxsum; *sse = xxsum;
return (xxsum - ((xsum * xsum) >> 6)); return (xxsum - ((xsum * xsum) >> 6));
@@ -344,7 +352,7 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
vp8_filter_block2d_bil_var_sse2( vp8_filter_block2d_bil_var_sse2(
src_ptr, src_pixels_per_line, src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16, dst_ptr, dst_pixels_per_line, 16,
vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset], xoffset, yoffset,
&xsum0, &xxsum0 &xsum0, &xxsum0
); );
@@ -352,7 +360,7 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
vp8_filter_block2d_bil_var_sse2( vp8_filter_block2d_bil_var_sse2(
src_ptr + 8, src_pixels_per_line, src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 16, dst_ptr + 8, dst_pixels_per_line, 16,
vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset], xoffset, yoffset,
&xsum1, &xxsum1 &xsum1, &xxsum1
); );
} }
@@ -392,21 +400,56 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
int xsum0, xsum1; int xsum0, xsum1;
unsigned int xxsum0, xxsum1; unsigned int xxsum0, xxsum1;
if (xoffset == 4 && yoffset == 0)
{
vp8_half_horiz_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
&xsum0, &xxsum0);
vp8_filter_block2d_bil_var_sse2( vp8_half_horiz_variance16x_h_sse2(
src_ptr, src_pixels_per_line, src_ptr + 8, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8, dst_ptr + 8, dst_pixels_per_line, 8,
vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset], &xsum1, &xxsum1);
&xsum0, &xxsum0 }
); else if (xoffset == 0 && yoffset == 4)
{
vp8_half_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
&xsum0, &xxsum0);
vp8_half_vert_variance16x_h_sse2(
src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 8,
&xsum1, &xxsum1);
}
else if (xoffset == 4 && yoffset == 4)
{
vp8_half_horiz_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
&xsum0, &xxsum0);
vp8_filter_block2d_bil_var_sse2( vp8_half_horiz_vert_variance16x_h_sse2(
src_ptr + 8, src_pixels_per_line, src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 8, dst_ptr + 8, dst_pixels_per_line, 8,
vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset], &xsum1, &xxsum1);
&xsum1, &xxsum1 }
); else
{
vp8_filter_block2d_bil_var_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
xoffset, yoffset,
&xsum0, &xxsum0);
vp8_filter_block2d_bil_var_sse2(
src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 8,
xoffset, yoffset,
&xsum1, &xxsum1);
}
xsum0 += xsum1; xsum0 += xsum1;
xxsum0 += xxsum1; xxsum0 += xxsum1;
@@ -428,12 +471,36 @@ unsigned int vp8_sub_pixel_variance8x16_wmt
{ {
int xsum; int xsum;
unsigned int xxsum; unsigned int xxsum;
vp8_filter_block2d_bil_var_sse2(
src_ptr, src_pixels_per_line, if (xoffset == 4 && yoffset == 0)
dst_ptr, dst_pixels_per_line, 16, {
vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset], vp8_half_horiz_variance16x_h_sse2(
&xsum, &xxsum src_ptr, src_pixels_per_line,
); dst_ptr, dst_pixels_per_line, 16,
&xsum, &xxsum);
}
else if (xoffset == 0 && yoffset == 4)
{
vp8_half_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum, &xxsum);
}
else if (xoffset == 4 && yoffset == 4)
{
vp8_half_horiz_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum, &xxsum);
}
else
{
vp8_filter_block2d_bil_var_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
xoffset, yoffset,
&xsum, &xxsum);
}
*sse = xxsum; *sse = xxsum;
return (xxsum - ((xsum * xsum) >> 7)); return (xxsum - ((xsum * xsum) >> 7));