Merge "fixed an overflow in ssim calculation"
This commit is contained in:
commit
c619f6cb0f
@ -290,8 +290,8 @@ void ssim_parms_8x8_c
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const static long long c1 = 426148; // (256^2*(.01*255)^2
|
const static long long cc1 = 26634; // (64^2*(.01*255)^2
|
||||||
const static long long c2 = 3835331; //(256^2*(.03*255)^2
|
const static long long cc2 = 239708; // (64^2*(.03*255)^2
|
||||||
|
|
||||||
static double similarity
|
static double similarity
|
||||||
(
|
(
|
||||||
@ -303,10 +303,19 @@ static double similarity
|
|||||||
int count
|
int count
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
long long ssim_n = (2*sum_s*sum_r+ c1)*(2*count*sum_sxr-2*sum_s*sum_r+c2);
|
long long ssim_n, ssim_d;
|
||||||
|
long long c1, c2;
|
||||||
|
|
||||||
long long ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
|
//scale the constants by number of pixels
|
||||||
(count*sum_sq_s-sum_s*sum_s + count*sum_sq_r-sum_r*sum_r +c2) ;
|
c1 = (cc1*count*count)>>12;
|
||||||
|
c2 = (cc2*count*count)>>12;
|
||||||
|
|
||||||
|
ssim_n = (2*sum_s*sum_r+ c1)*((long long) 2*count*sum_sxr-
|
||||||
|
(long long) 2*sum_s*sum_r+c2);
|
||||||
|
|
||||||
|
ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
|
||||||
|
((long long)count*sum_sq_s-(long long)sum_s*sum_s +
|
||||||
|
(long long)count*sum_sq_r-(long long) sum_r*sum_r +c2) ;
|
||||||
|
|
||||||
return ssim_n * 1.0 / ssim_d;
|
return ssim_n * 1.0 / ssim_d;
|
||||||
}
|
}
|
||||||
@ -332,18 +341,33 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
|
|||||||
const vp8_variance_rtcd_vtable_t *rtcd)
|
const vp8_variance_rtcd_vtable_t *rtcd)
|
||||||
{
|
{
|
||||||
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
|
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
|
||||||
double ssim3;
|
long long ssim3;
|
||||||
long long ssim_n;
|
long long ssim_n,ssim_n1,ssim_n2;
|
||||||
long long ssim_d;
|
long long ssim_d,ssim_d1,ssim_d2;
|
||||||
|
long long ssim_t1,ssim_t2;
|
||||||
|
long long c1, c2;
|
||||||
|
|
||||||
|
// normalize by 256/64
|
||||||
|
c1 = cc1*16;
|
||||||
|
c2 = cc2*16;
|
||||||
|
|
||||||
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
||||||
ssim_n = (2*sum_s*sum_r+ c1)*(2*256*sum_sxr-2*sum_s*sum_r+c2);
|
ssim_n1 = (2*sum_s*sum_r+ c1);
|
||||||
|
|
||||||
ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
|
ssim_n2 =((long long) 2*256*sum_sxr-(long long) 2*sum_s*sum_r+c2);
|
||||||
(256*sum_sq_s-sum_s*sum_s + 256*sum_sq_r-sum_r*sum_r +c2) ;
|
|
||||||
|
|
||||||
ssim3 = 256 * (ssim_d-ssim_n) / ssim_d;
|
ssim_d1 =((long long)sum_s*sum_s +(long long)sum_r*sum_r+c1);
|
||||||
return (long)( 256*ssim3 * ssim3 );
|
|
||||||
|
ssim_d2 = (256 * (long long) sum_sq_s-(long long) sum_s*sum_s +
|
||||||
|
(long long) 256*sum_sq_r-(long long) sum_r*sum_r +c2) ;
|
||||||
|
|
||||||
|
ssim_t1 = 256 - 256 * ssim_n1 / ssim_d1;
|
||||||
|
ssim_t2 = 256 - 256 * ssim_n2 / ssim_d2;
|
||||||
|
|
||||||
|
ssim3 = 256 *ssim_t1 * ssim_t2;
|
||||||
|
if(ssim3 <0 )
|
||||||
|
ssim3=0;
|
||||||
|
return (long)( ssim3 );
|
||||||
}
|
}
|
||||||
// TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels
|
// TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels
|
||||||
// such that the window regions overlap block boundaries to penalize blocking
|
// such that the window regions overlap block boundaries to penalize blocking
|
||||||
@ -361,18 +385,20 @@ double vp8_ssim2
|
|||||||
)
|
)
|
||||||
{
|
{
|
||||||
int i,j;
|
int i,j;
|
||||||
|
int samples =0;
|
||||||
double ssim_total=0;
|
double ssim_total=0;
|
||||||
|
|
||||||
// we can sample points as frequently as we like start with 1 per 8x8
|
// we can sample points as frequently as we like start with 1 per 4x4
|
||||||
for(i=0; i < height; i+=8, img1 += stride_img1*8, img2 += stride_img2*8)
|
for(i=0; i < height-8; i+=4, img1 += stride_img1*4, img2 += stride_img2*4)
|
||||||
{
|
{
|
||||||
for(j=0; j < width; j+=8 )
|
for(j=0; j < width-8; j+=4 )
|
||||||
{
|
{
|
||||||
ssim_total += ssim_8x8(img1, stride_img1, img2, stride_img2, rtcd);
|
double v = ssim_8x8(img1+j, stride_img1, img2+j, stride_img2, rtcd);
|
||||||
|
ssim_total += v;
|
||||||
|
samples++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ssim_total /= (width/8 * height /8);
|
ssim_total /= samples;
|
||||||
return ssim_total;
|
return ssim_total;
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -405,4 +431,4 @@ double vp8_calc_ssim
|
|||||||
*weight = 1;
|
*weight = 1;
|
||||||
|
|
||||||
return ssimv;
|
return ssimv;
|
||||||
}
|
}
|
@ -16,12 +16,12 @@
|
|||||||
paddusw xmm14, xmm4 ; sum_r
|
paddusw xmm14, xmm4 ; sum_r
|
||||||
movdqa xmm1, xmm3
|
movdqa xmm1, xmm3
|
||||||
pmaddwd xmm1, xmm1
|
pmaddwd xmm1, xmm1
|
||||||
paddq xmm13, xmm1 ; sum_sq_s
|
paddd xmm13, xmm1 ; sum_sq_s
|
||||||
movdqa xmm2, xmm4
|
movdqa xmm2, xmm4
|
||||||
pmaddwd xmm2, xmm2
|
pmaddwd xmm2, xmm2
|
||||||
paddq xmm12, xmm2 ; sum_sq_r
|
paddd xmm12, xmm2 ; sum_sq_r
|
||||||
pmaddwd xmm3, xmm4
|
pmaddwd xmm3, xmm4
|
||||||
paddq xmm11, xmm3 ; sum_sxr
|
paddd xmm11, xmm3 ; sum_sxr
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
; Sum across the register %1 starting with q words
|
; Sum across the register %1 starting with q words
|
||||||
@ -66,6 +66,7 @@ sym(vp8_ssim_parms_16x16_sse3):
|
|||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 9
|
SHADOW_ARGS_TO_STACK 9
|
||||||
|
SAVE_XMM
|
||||||
push rsi
|
push rsi
|
||||||
push rdi
|
push rdi
|
||||||
; end prolog
|
; end prolog
|
||||||
@ -115,19 +116,20 @@ NextRow:
|
|||||||
SUM_ACROSS_Q xmm11
|
SUM_ACROSS_Q xmm11
|
||||||
|
|
||||||
mov rdi,arg(4)
|
mov rdi,arg(4)
|
||||||
movq [rdi], xmm15;
|
movd [rdi], xmm15;
|
||||||
mov rdi,arg(5)
|
mov rdi,arg(5)
|
||||||
movq [rdi], xmm14;
|
movd [rdi], xmm14;
|
||||||
mov rdi,arg(6)
|
mov rdi,arg(6)
|
||||||
movq [rdi], xmm13;
|
movd [rdi], xmm13;
|
||||||
mov rdi,arg(7)
|
mov rdi,arg(7)
|
||||||
movq [rdi], xmm12;
|
movd [rdi], xmm12;
|
||||||
mov rdi,arg(8)
|
mov rdi,arg(8)
|
||||||
movq [rdi], xmm11;
|
movd [rdi], xmm11;
|
||||||
|
|
||||||
; begin epilog
|
; begin epilog
|
||||||
pop rdi
|
pop rdi
|
||||||
pop rsi
|
pop rsi
|
||||||
|
RESTORE_XMM
|
||||||
UNSHADOW_ARGS
|
UNSHADOW_ARGS
|
||||||
pop rbp
|
pop rbp
|
||||||
ret
|
ret
|
||||||
@ -154,6 +156,7 @@ sym(vp8_ssim_parms_8x8_sse3):
|
|||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 9
|
SHADOW_ARGS_TO_STACK 9
|
||||||
|
SAVE_XMM
|
||||||
push rsi
|
push rsi
|
||||||
push rdi
|
push rdi
|
||||||
; end prolog
|
; end prolog
|
||||||
@ -174,11 +177,8 @@ sym(vp8_ssim_parms_8x8_sse3):
|
|||||||
NextRow2:
|
NextRow2:
|
||||||
|
|
||||||
;grab source and reference pixels
|
;grab source and reference pixels
|
||||||
movq xmm5, [rsi]
|
movq xmm3, [rsi]
|
||||||
movq xmm6, [rdi]
|
movq xmm4, [rdi]
|
||||||
|
|
||||||
movdqa xmm3, xmm5
|
|
||||||
movdqa xmm4, xmm6
|
|
||||||
punpcklbw xmm3, xmm0 ; low_s
|
punpcklbw xmm3, xmm0 ; low_s
|
||||||
punpcklbw xmm4, xmm0 ; low_r
|
punpcklbw xmm4, xmm0 ; low_r
|
||||||
|
|
||||||
@ -197,19 +197,20 @@ NextRow2:
|
|||||||
SUM_ACROSS_Q xmm11
|
SUM_ACROSS_Q xmm11
|
||||||
|
|
||||||
mov rdi,arg(4)
|
mov rdi,arg(4)
|
||||||
movq [rdi], xmm15;
|
movd [rdi], xmm15;
|
||||||
mov rdi,arg(5)
|
mov rdi,arg(5)
|
||||||
movq [rdi], xmm14;
|
movd [rdi], xmm14;
|
||||||
mov rdi,arg(6)
|
mov rdi,arg(6)
|
||||||
movq [rdi], xmm13;
|
movd [rdi], xmm13;
|
||||||
mov rdi,arg(7)
|
mov rdi,arg(7)
|
||||||
movq [rdi], xmm12;
|
movd [rdi], xmm12;
|
||||||
mov rdi,arg(8)
|
mov rdi,arg(8)
|
||||||
movq [rdi], xmm11;
|
movd [rdi], xmm11;
|
||||||
|
|
||||||
; begin epilog
|
; begin epilog
|
||||||
pop rdi
|
pop rdi
|
||||||
pop rsi
|
pop rsi
|
||||||
|
RESTORE_XMM
|
||||||
UNSHADOW_ARGS
|
UNSHADOW_ARGS
|
||||||
pop rbp
|
pop rbp
|
||||||
ret
|
ret
|
||||||
|
Loading…
x
Reference in New Issue
Block a user