Merge "fixed an overflow in ssim calculation"

This commit is contained in:
Yaowu Xu 2011-04-18 07:44:33 -07:00 committed by Code Review
commit c619f6cb0f
2 changed files with 65 additions and 38 deletions

View File

@ -290,8 +290,8 @@ void ssim_parms_8x8_c
} }
} }
const static long long c1 = 426148; // (256^2*(.01*255)^2 const static long long cc1 = 26634; // (64^2*(.01*255)^2
const static long long c2 = 3835331; //(256^2*(.03*255)^2 const static long long cc2 = 239708; // (64^2*(.03*255)^2
static double similarity static double similarity
( (
@ -303,10 +303,19 @@ static double similarity
int count int count
) )
{ {
long long ssim_n = (2*sum_s*sum_r+ c1)*(2*count*sum_sxr-2*sum_s*sum_r+c2); long long ssim_n, ssim_d;
long long c1, c2;
long long ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)* //scale the constants by number of pixels
(count*sum_sq_s-sum_s*sum_s + count*sum_sq_r-sum_r*sum_r +c2) ; c1 = (cc1*count*count)>>12;
c2 = (cc2*count*count)>>12;
ssim_n = (2*sum_s*sum_r+ c1)*((long long) 2*count*sum_sxr-
(long long) 2*sum_s*sum_r+c2);
ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
((long long)count*sum_sq_s-(long long)sum_s*sum_s +
(long long)count*sum_sq_r-(long long) sum_r*sum_r +c2) ;
return ssim_n * 1.0 / ssim_d; return ssim_n * 1.0 / ssim_d;
} }
@ -332,18 +341,33 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
const vp8_variance_rtcd_vtable_t *rtcd) const vp8_variance_rtcd_vtable_t *rtcd)
{ {
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0; unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
double ssim3; long long ssim3;
long long ssim_n; long long ssim_n,ssim_n1,ssim_n2;
long long ssim_d; long long ssim_d,ssim_d1,ssim_d2;
long long ssim_t1,ssim_t2;
long long c1, c2;
// normalize by 256/64
c1 = cc1*16;
c2 = cc2*16;
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
ssim_n = (2*sum_s*sum_r+ c1)*(2*256*sum_sxr-2*sum_s*sum_r+c2); ssim_n1 = (2*sum_s*sum_r+ c1);
ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)* ssim_n2 =((long long) 2*256*sum_sxr-(long long) 2*sum_s*sum_r+c2);
(256*sum_sq_s-sum_s*sum_s + 256*sum_sq_r-sum_r*sum_r +c2) ;
ssim3 = 256 * (ssim_d-ssim_n) / ssim_d; ssim_d1 =((long long)sum_s*sum_s +(long long)sum_r*sum_r+c1);
return (long)( 256*ssim3 * ssim3 );
ssim_d2 = (256 * (long long) sum_sq_s-(long long) sum_s*sum_s +
(long long) 256*sum_sq_r-(long long) sum_r*sum_r +c2) ;
ssim_t1 = 256 - 256 * ssim_n1 / ssim_d1;
ssim_t2 = 256 - 256 * ssim_n2 / ssim_d2;
ssim3 = 256 *ssim_t1 * ssim_t2;
if(ssim3 <0 )
ssim3=0;
return (long)( ssim3 );
} }
// TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels // TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels
// such that the window regions overlap block boundaries to penalize blocking // such that the window regions overlap block boundaries to penalize blocking
@ -361,18 +385,20 @@ double vp8_ssim2
) )
{ {
int i,j; int i,j;
int samples =0;
double ssim_total=0; double ssim_total=0;
// we can sample points as frequently as we like start with 1 per 8x8 // we can sample points as frequently as we like start with 1 per 4x4
for(i=0; i < height; i+=8, img1 += stride_img1*8, img2 += stride_img2*8) for(i=0; i < height-8; i+=4, img1 += stride_img1*4, img2 += stride_img2*4)
{ {
for(j=0; j < width; j+=8 ) for(j=0; j < width-8; j+=4 )
{ {
ssim_total += ssim_8x8(img1, stride_img1, img2, stride_img2, rtcd); double v = ssim_8x8(img1+j, stride_img1, img2+j, stride_img2, rtcd);
ssim_total += v;
samples++;
} }
} }
ssim_total /= (width/8 * height /8); ssim_total /= samples;
return ssim_total; return ssim_total;
} }
@ -405,4 +431,4 @@ double vp8_calc_ssim
*weight = 1; *weight = 1;
return ssimv; return ssimv;
} }

View File

@ -16,12 +16,12 @@
paddusw xmm14, xmm4 ; sum_r paddusw xmm14, xmm4 ; sum_r
movdqa xmm1, xmm3 movdqa xmm1, xmm3
pmaddwd xmm1, xmm1 pmaddwd xmm1, xmm1
paddq xmm13, xmm1 ; sum_sq_s paddd xmm13, xmm1 ; sum_sq_s
movdqa xmm2, xmm4 movdqa xmm2, xmm4
pmaddwd xmm2, xmm2 pmaddwd xmm2, xmm2
paddq xmm12, xmm2 ; sum_sq_r paddd xmm12, xmm2 ; sum_sq_r
pmaddwd xmm3, xmm4 pmaddwd xmm3, xmm4
paddq xmm11, xmm3 ; sum_sxr paddd xmm11, xmm3 ; sum_sxr
%endmacro %endmacro
; Sum across the register %1 starting with q words ; Sum across the register %1 starting with q words
@ -66,6 +66,7 @@ sym(vp8_ssim_parms_16x16_sse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 9 SHADOW_ARGS_TO_STACK 9
SAVE_XMM
push rsi push rsi
push rdi push rdi
; end prolog ; end prolog
@ -115,19 +116,20 @@ NextRow:
SUM_ACROSS_Q xmm11 SUM_ACROSS_Q xmm11
mov rdi,arg(4) mov rdi,arg(4)
movq [rdi], xmm15; movd [rdi], xmm15;
mov rdi,arg(5) mov rdi,arg(5)
movq [rdi], xmm14; movd [rdi], xmm14;
mov rdi,arg(6) mov rdi,arg(6)
movq [rdi], xmm13; movd [rdi], xmm13;
mov rdi,arg(7) mov rdi,arg(7)
movq [rdi], xmm12; movd [rdi], xmm12;
mov rdi,arg(8) mov rdi,arg(8)
movq [rdi], xmm11; movd [rdi], xmm11;
; begin epilog ; begin epilog
pop rdi pop rdi
pop rsi pop rsi
RESTORE_XMM
UNSHADOW_ARGS UNSHADOW_ARGS
pop rbp pop rbp
ret ret
@ -154,6 +156,7 @@ sym(vp8_ssim_parms_8x8_sse3):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 9 SHADOW_ARGS_TO_STACK 9
SAVE_XMM
push rsi push rsi
push rdi push rdi
; end prolog ; end prolog
@ -174,11 +177,8 @@ sym(vp8_ssim_parms_8x8_sse3):
NextRow2: NextRow2:
;grab source and reference pixels ;grab source and reference pixels
movq xmm5, [rsi] movq xmm3, [rsi]
movq xmm6, [rdi] movq xmm4, [rdi]
movdqa xmm3, xmm5
movdqa xmm4, xmm6
punpcklbw xmm3, xmm0 ; low_s punpcklbw xmm3, xmm0 ; low_s
punpcklbw xmm4, xmm0 ; low_r punpcklbw xmm4, xmm0 ; low_r
@ -197,19 +197,20 @@ NextRow2:
SUM_ACROSS_Q xmm11 SUM_ACROSS_Q xmm11
mov rdi,arg(4) mov rdi,arg(4)
movq [rdi], xmm15; movd [rdi], xmm15;
mov rdi,arg(5) mov rdi,arg(5)
movq [rdi], xmm14; movd [rdi], xmm14;
mov rdi,arg(6) mov rdi,arg(6)
movq [rdi], xmm13; movd [rdi], xmm13;
mov rdi,arg(7) mov rdi,arg(7)
movq [rdi], xmm12; movd [rdi], xmm12;
mov rdi,arg(8) mov rdi,arg(8)
movq [rdi], xmm11; movd [rdi], xmm11;
; begin epilog ; begin epilog
pop rdi pop rdi
pop rsi pop rsi
RESTORE_XMM
UNSHADOW_ARGS UNSHADOW_ARGS
pop rbp pop rbp
ret ret