Merge "fixed an overflow in ssim calculation"

This commit is contained in:
Yaowu Xu 2011-04-18 07:44:33 -07:00 committed by Code Review
commit c619f6cb0f
2 changed files with 65 additions and 38 deletions

View File

@ -290,8 +290,8 @@ void ssim_parms_8x8_c
}
}
const static long long c1 = 426148; // (256^2*(.01*255)^2
const static long long c2 = 3835331; //(256^2*(.03*255)^2
const static long long cc1 = 26634; // (64^2*(.01*255)^2
const static long long cc2 = 239708; // (64^2*(.03*255)^2
static double similarity
(
@ -303,10 +303,19 @@ static double similarity
int count
)
{
long long ssim_n = (2*sum_s*sum_r+ c1)*(2*count*sum_sxr-2*sum_s*sum_r+c2);
long long ssim_n, ssim_d;
long long c1, c2;
long long ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
(count*sum_sq_s-sum_s*sum_s + count*sum_sq_r-sum_r*sum_r +c2) ;
//scale the constants by number of pixels
c1 = (cc1*count*count)>>12;
c2 = (cc2*count*count)>>12;
ssim_n = (2*sum_s*sum_r+ c1)*((long long) 2*count*sum_sxr-
(long long) 2*sum_s*sum_r+c2);
ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
((long long)count*sum_sq_s-(long long)sum_s*sum_s +
(long long)count*sum_sq_r-(long long) sum_r*sum_r +c2) ;
return ssim_n * 1.0 / ssim_d;
}
@ -332,18 +341,33 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
const vp8_variance_rtcd_vtable_t *rtcd)
{
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
double ssim3;
long long ssim_n;
long long ssim_d;
long long ssim3;
long long ssim_n,ssim_n1,ssim_n2;
long long ssim_d,ssim_d1,ssim_d2;
long long ssim_t1,ssim_t2;
long long c1, c2;
// normalize by 256/64
c1 = cc1*16;
c2 = cc2*16;
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
ssim_n = (2*sum_s*sum_r+ c1)*(2*256*sum_sxr-2*sum_s*sum_r+c2);
ssim_n1 = (2*sum_s*sum_r+ c1);
ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
(256*sum_sq_s-sum_s*sum_s + 256*sum_sq_r-sum_r*sum_r +c2) ;
ssim_n2 =((long long) 2*256*sum_sxr-(long long) 2*sum_s*sum_r+c2);
ssim3 = 256 * (ssim_d-ssim_n) / ssim_d;
return (long)( 256*ssim3 * ssim3 );
ssim_d1 =((long long)sum_s*sum_s +(long long)sum_r*sum_r+c1);
ssim_d2 = (256 * (long long) sum_sq_s-(long long) sum_s*sum_s +
(long long) 256*sum_sq_r-(long long) sum_r*sum_r +c2) ;
ssim_t1 = 256 - 256 * ssim_n1 / ssim_d1;
ssim_t2 = 256 - 256 * ssim_n2 / ssim_d2;
ssim3 = 256 *ssim_t1 * ssim_t2;
if(ssim3 <0 )
ssim3=0;
return (long)( ssim3 );
}
// TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels
// such that the window regions overlap block boundaries to penalize blocking
@ -361,18 +385,20 @@ double vp8_ssim2
)
{
int i,j;
int samples =0;
double ssim_total=0;
// we can sample points as frequently as we like start with 1 per 8x8
for(i=0; i < height; i+=8, img1 += stride_img1*8, img2 += stride_img2*8)
// we can sample points as frequently as we like start with 1 per 4x4
for(i=0; i < height-8; i+=4, img1 += stride_img1*4, img2 += stride_img2*4)
{
for(j=0; j < width; j+=8 )
for(j=0; j < width-8; j+=4 )
{
ssim_total += ssim_8x8(img1, stride_img1, img2, stride_img2, rtcd);
double v = ssim_8x8(img1+j, stride_img1, img2+j, stride_img2, rtcd);
ssim_total += v;
samples++;
}
}
ssim_total /= (width/8 * height /8);
ssim_total /= samples;
return ssim_total;
}
@ -405,4 +431,4 @@ double vp8_calc_ssim
*weight = 1;
return ssimv;
}
}

View File

@ -16,12 +16,12 @@
paddusw xmm14, xmm4 ; sum_r
movdqa xmm1, xmm3
pmaddwd xmm1, xmm1
paddq xmm13, xmm1 ; sum_sq_s
paddd xmm13, xmm1 ; sum_sq_s
movdqa xmm2, xmm4
pmaddwd xmm2, xmm2
paddq xmm12, xmm2 ; sum_sq_r
paddd xmm12, xmm2 ; sum_sq_r
pmaddwd xmm3, xmm4
paddq xmm11, xmm3 ; sum_sxr
paddd xmm11, xmm3 ; sum_sxr
%endmacro
; Sum across the register %1 starting with q words
@ -66,6 +66,7 @@ sym(vp8_ssim_parms_16x16_sse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
SAVE_XMM
push rsi
push rdi
; end prolog
@ -115,19 +116,20 @@ NextRow:
SUM_ACROSS_Q xmm11
mov rdi,arg(4)
movq [rdi], xmm15;
movd [rdi], xmm15;
mov rdi,arg(5)
movq [rdi], xmm14;
movd [rdi], xmm14;
mov rdi,arg(6)
movq [rdi], xmm13;
movd [rdi], xmm13;
mov rdi,arg(7)
movq [rdi], xmm12;
movd [rdi], xmm12;
mov rdi,arg(8)
movq [rdi], xmm11;
movd [rdi], xmm11;
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -154,6 +156,7 @@ sym(vp8_ssim_parms_8x8_sse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
SAVE_XMM
push rsi
push rdi
; end prolog
@ -174,11 +177,8 @@ sym(vp8_ssim_parms_8x8_sse3):
NextRow2:
;grab source and reference pixels
movq xmm5, [rsi]
movq xmm6, [rdi]
movdqa xmm3, xmm5
movdqa xmm4, xmm6
movq xmm3, [rsi]
movq xmm4, [rdi]
punpcklbw xmm3, xmm0 ; low_s
punpcklbw xmm4, xmm0 ; low_r
@ -197,19 +197,20 @@ NextRow2:
SUM_ACROSS_Q xmm11
mov rdi,arg(4)
movq [rdi], xmm15;
movd [rdi], xmm15;
mov rdi,arg(5)
movq [rdi], xmm14;
movd [rdi], xmm14;
mov rdi,arg(6)
movq [rdi], xmm13;
movd [rdi], xmm13;
mov rdi,arg(7)
movq [rdi], xmm12;
movd [rdi], xmm12;
mov rdi,arg(8)
movq [rdi], xmm11;
movd [rdi], xmm11;
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret