diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl index d65ca604d..7924ae750 100644 --- a/vp8/common/rtcd_defs.pl +++ b/vp8/common/rtcd_defs.pl @@ -254,19 +254,6 @@ if ($opts{arch} =~ /x86/) { specialize qw/vp8_copy32xn sse2 sse3/; } -# -# Structured Similarity (SSIM) -# -if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") { - $opts{arch} eq "x86_64" and $sse2_on_x86_64 = "sse2"; - - add_proto qw/void vp8_ssim_parms_8x8/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; - specialize qw/vp8_ssim_parms_8x8/, "$sse2_on_x86_64"; - - add_proto qw/void vp8_ssim_parms_16x16/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; - specialize qw/vp8_ssim_parms_16x16/, "$sse2_on_x86_64"; -} - # # Forward DCT # diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index c2a7ac4ce..d2fb05ab7 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -74,26 +74,7 @@ extern const int vp8_gf_interval_table[101]; #if CONFIG_INTERNAL_STATS #include "math.h" - -extern double vp8_calc_ssim -( - YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - int lumamask, - double *weight -); - - -extern double vp8_calc_ssimg -( - YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - double *ssim_y, - double *ssim_u, - double *ssim_v -); - - +#include "vpx_dsp/ssim.h" #endif @@ -5741,8 +5722,8 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l cpi->total_sq_error2 += sq_error2; cpi->totalp += frame_psnr2; - frame_ssim2 = vp8_calc_ssim(cpi->Source, - &cm->post_proc_buffer, 1, &weight); + frame_ssim2 = vpx_calc_ssim(cpi->Source, + &cm->post_proc_buffer, &weight); cpi->summed_quality += frame_ssim2 * weight; cpi->summed_weights += weight; @@ -5772,7 +5753,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (cpi->b_calculate_ssimg) { double y, u, v, frame_all; - frame_all = vp8_calc_ssimg(cpi->Source, cm->frame_to_show, + frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v); if (cpi->oxcf.number_of_layers > 1) diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c deleted file mode 100644 index e75160836..000000000 --- a/vp8/encoder/ssim.c +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "onyx_int.h" - -void vp8_ssim_parms_16x16_c -( - unsigned char *s, - int sp, - unsigned char *r, - int rp, - unsigned long *sum_s, - unsigned long *sum_r, - unsigned long *sum_sq_s, - unsigned long *sum_sq_r, - unsigned long *sum_sxr -) -{ - int i,j; - for(i=0;i<16;i++,s+=sp,r+=rp) - { - for(j=0;j<16;j++) - { - *sum_s += s[j]; - *sum_r += r[j]; - *sum_sq_s += s[j] * s[j]; - *sum_sq_r += r[j] * r[j]; - *sum_sxr += s[j] * r[j]; - } - } -} -void vp8_ssim_parms_8x8_c -( - unsigned char *s, - int sp, - unsigned char *r, - int rp, - unsigned long *sum_s, - unsigned long *sum_r, - unsigned long *sum_sq_s, - unsigned long *sum_sq_r, - unsigned long *sum_sxr -) -{ - int i,j; - for(i=0;i<8;i++,s+=sp,r+=rp) - { - for(j=0;j<8;j++) - { - *sum_s += s[j]; - *sum_r += r[j]; - *sum_sq_s += s[j] * s[j]; - *sum_sq_r += r[j] * r[j]; - *sum_sxr += s[j] * r[j]; - } - } -} - -const static int64_t cc1 = 26634; // (64^2*(.01*255)^2 -const static int64_t cc2 = 239708; // (64^2*(.03*255)^2 - -static double similarity -( - unsigned long sum_s, - unsigned long sum_r, - unsigned long sum_sq_s, - unsigned long sum_sq_r, - unsigned long sum_sxr, - int count -) -{ - int64_t ssim_n, ssim_d; - int64_t c1, c2; - - //scale the constants by number of pixels - c1 = (cc1*count*count)>>12; - c2 = (cc2*count*count)>>12; - - ssim_n = (2*sum_s*sum_r+ c1)*((int64_t) 2*count*sum_sxr- - (int64_t) 2*sum_s*sum_r+c2); - - ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)* - ((int64_t)count*sum_sq_s-(int64_t)sum_s*sum_s + - (int64_t)count*sum_sq_r-(int64_t) sum_r*sum_r +c2) ; - - return ssim_n * 1.0 / ssim_d; -} - -static double ssim_16x16(unsigned char *s,int sp, unsigned char *r,int rp) -{ - unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0; - vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); - return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256); -} -static double ssim_8x8(unsigned char *s,int sp, unsigned char *r,int rp) -{ - unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0; - vp8_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); - return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64); -} - -// TODO: (jbb) tried to scale this function such that we may be able to use it -// for distortion metric in mode selection code ( provided we do a reconstruction) -long dssim(unsigned char *s,int sp, unsigned char *r,int rp) -{ - unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0; - int64_t ssim3; - int64_t ssim_n1,ssim_n2; - int64_t ssim_d1,ssim_d2; - int64_t ssim_t1,ssim_t2; - int64_t c1, c2; - - // normalize by 256/64 - c1 = cc1*16; - c2 = cc2*16; - - vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); - ssim_n1 = (2*sum_s*sum_r+ c1); - - ssim_n2 =((int64_t) 2*256*sum_sxr-(int64_t) 2*sum_s*sum_r+c2); - - ssim_d1 =((int64_t)sum_s*sum_s +(int64_t)sum_r*sum_r+c1); - - ssim_d2 = (256 * (int64_t) sum_sq_s-(int64_t) sum_s*sum_s + - (int64_t) 256*sum_sq_r-(int64_t) sum_r*sum_r +c2) ; - - ssim_t1 = 256 - 256 * ssim_n1 / ssim_d1; - ssim_t2 = 256 - 256 * ssim_n2 / ssim_d2; - - ssim3 = 256 *ssim_t1 * ssim_t2; - if(ssim3 <0 ) - ssim3=0; - return (long)( ssim3 ); -} - -// We are using a 8x8 moving window with starting location of each 8x8 window -// on the 4x4 pixel grid. Such arrangement allows the windows to overlap -// block boundaries to penalize blocking artifacts. -double vp8_ssim2 -( - unsigned char *img1, - unsigned char *img2, - int stride_img1, - int stride_img2, - int width, - int height -) -{ - int i,j; - int samples =0; - double ssim_total=0; - - // sample point start with each 4x4 location - for(i=0; i < height-8; i+=4, img1 += stride_img1*4, img2 += stride_img2*4) - { - for(j=0; j < width-8; j+=4 ) - { - double v = ssim_8x8(img1+j, stride_img1, img2+j, stride_img2); - ssim_total += v; - samples++; - } - } - ssim_total /= samples; - return ssim_total; -} -double vp8_calc_ssim -( - YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - int lumamask, - double *weight -) -{ - double a, b, c; - double ssimv; - - a = vp8_ssim2(source->y_buffer, dest->y_buffer, - source->y_stride, dest->y_stride, source->y_width, - source->y_height); - - b = vp8_ssim2(source->u_buffer, dest->u_buffer, - source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height); - - c = vp8_ssim2(source->v_buffer, dest->v_buffer, - source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height); - - ssimv = a * .8 + .1 * (b + c); - - *weight = 1; - - return ssimv; -} - -double vp8_calc_ssimg -( - YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - double *ssim_y, - double *ssim_u, - double *ssim_v -) -{ - double ssim_all = 0; - double a, b, c; - - a = vp8_ssim2(source->y_buffer, dest->y_buffer, - source->y_stride, dest->y_stride, source->y_width, - source->y_height); - - b = vp8_ssim2(source->u_buffer, dest->u_buffer, - source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height); - - c = vp8_ssim2(source->v_buffer, dest->v_buffer, - source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height); - *ssim_y = a; - *ssim_u = b; - *ssim_v = c; - ssim_all = (a * 4 + b + c) /6; - - return ssim_all; -} diff --git a/vp8/encoder/x86/ssim_opt_x86_64.asm b/vp8/encoder/x86/ssim_opt_x86_64.asm deleted file mode 100644 index 5964a85f2..000000000 --- a/vp8/encoder/x86/ssim_opt_x86_64.asm +++ /dev/null @@ -1,216 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr -%macro TABULATE_SSIM 0 - paddusw xmm15, xmm3 ; sum_s - paddusw xmm14, xmm4 ; sum_r - movdqa xmm1, xmm3 - pmaddwd xmm1, xmm1 - paddd xmm13, xmm1 ; sum_sq_s - movdqa xmm2, xmm4 - pmaddwd xmm2, xmm2 - paddd xmm12, xmm2 ; sum_sq_r - pmaddwd xmm3, xmm4 - paddd xmm11, xmm3 ; sum_sxr -%endmacro - -; Sum across the register %1 starting with q words -%macro SUM_ACROSS_Q 1 - movdqa xmm2,%1 - punpckldq %1,xmm0 - punpckhdq xmm2,xmm0 - paddq %1,xmm2 - movdqa xmm2,%1 - punpcklqdq %1,xmm0 - punpckhqdq xmm2,xmm0 - paddq %1,xmm2 -%endmacro - -; Sum across the register %1 starting with q words -%macro SUM_ACROSS_W 1 - movdqa xmm1, %1 - punpcklwd %1,xmm0 - punpckhwd xmm1,xmm0 - paddd %1, xmm1 - SUM_ACROSS_Q %1 -%endmacro -;void ssim_parms_sse2( -; unsigned char *s, -; int sp, -; unsigned char *r, -; int rp -; unsigned long *sum_s, -; unsigned long *sum_r, -; unsigned long *sum_sq_s, -; unsigned long *sum_sq_r, -; unsigned long *sum_sxr); -; -; TODO: Use parm passing through structure, probably don't need the pxors -; ( calling app will initialize to 0 ) could easily fit everything in sse2 -; without too much hastle, and can probably do better estimates with psadw -; or pavgb At this point this is just meant to be first pass for calculating -; all the parms needed for 16x16 ssim so we can play with dssim as distortion -; in mode selection code. -global sym(vp8_ssim_parms_16x16_sse2) PRIVATE -sym(vp8_ssim_parms_16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 15 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rcx, arg(1) ;sp - mov rdi, arg(2) ;r - mov rax, arg(3) ;rp - - pxor xmm0, xmm0 - pxor xmm15,xmm15 ;sum_s - pxor xmm14,xmm14 ;sum_r - pxor xmm13,xmm13 ;sum_sq_s - pxor xmm12,xmm12 ;sum_sq_r - pxor xmm11,xmm11 ;sum_sxr - - mov rdx, 16 ;row counter -.NextRow: - - ;grab source and reference pixels - movdqu xmm5, [rsi] - movdqu xmm6, [rdi] - movdqa xmm3, xmm5 - movdqa xmm4, xmm6 - punpckhbw xmm3, xmm0 ; high_s - punpckhbw xmm4, xmm0 ; high_r - - TABULATE_SSIM - - movdqa xmm3, xmm5 - movdqa xmm4, xmm6 - punpcklbw xmm3, xmm0 ; low_s - punpcklbw xmm4, xmm0 ; low_r - - TABULATE_SSIM - - add rsi, rcx ; next s row - add rdi, rax ; next r row - - dec rdx ; counter - jnz .NextRow - - SUM_ACROSS_W xmm15 - SUM_ACROSS_W xmm14 - SUM_ACROSS_Q xmm13 - SUM_ACROSS_Q xmm12 - SUM_ACROSS_Q xmm11 - - mov rdi,arg(4) - movd [rdi], xmm15; - mov rdi,arg(5) - movd [rdi], xmm14; - mov rdi,arg(6) - movd [rdi], xmm13; - mov rdi,arg(7) - movd [rdi], xmm12; - mov rdi,arg(8) - movd [rdi], xmm11; - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void ssim_parms_sse2( -; unsigned char *s, -; int sp, -; unsigned char *r, -; int rp -; unsigned long *sum_s, -; unsigned long *sum_r, -; unsigned long *sum_sq_s, -; unsigned long *sum_sq_r, -; unsigned long *sum_sxr); -; -; TODO: Use parm passing through structure, probably don't need the pxors -; ( calling app will initialize to 0 ) could easily fit everything in sse2 -; without too much hastle, and can probably do better estimates with psadw -; or pavgb At this point this is just meant to be first pass for calculating -; all the parms needed for 16x16 ssim so we can play with dssim as distortion -; in mode selection code. -global sym(vp8_ssim_parms_8x8_sse2) PRIVATE -sym(vp8_ssim_parms_8x8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 15 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rcx, arg(1) ;sp - mov rdi, arg(2) ;r - mov rax, arg(3) ;rp - - pxor xmm0, xmm0 - pxor xmm15,xmm15 ;sum_s - pxor xmm14,xmm14 ;sum_r - pxor xmm13,xmm13 ;sum_sq_s - pxor xmm12,xmm12 ;sum_sq_r - pxor xmm11,xmm11 ;sum_sxr - - mov rdx, 8 ;row counter -.NextRow: - - ;grab source and reference pixels - movq xmm3, [rsi] - movq xmm4, [rdi] - punpcklbw xmm3, xmm0 ; low_s - punpcklbw xmm4, xmm0 ; low_r - - TABULATE_SSIM - - add rsi, rcx ; next s row - add rdi, rax ; next r row - - dec rdx ; counter - jnz .NextRow - - SUM_ACROSS_W xmm15 - SUM_ACROSS_W xmm14 - SUM_ACROSS_Q xmm13 - SUM_ACROSS_Q xmm12 - SUM_ACROSS_Q xmm11 - - mov rdi,arg(4) - movd [rdi], xmm15; - mov rdi,arg(5) - movd [rdi], xmm14; - mov rdi,arg(6) - movd [rdi], xmm13; - mov rdi,arg(7) - movd [rdi], xmm12; - mov rdi,arg(8) - movd [rdi], xmm11; - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index 59f9fe839..ea7d472ae 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -65,7 +65,6 @@ VP8_CX_SRCS-yes += encoder/ratectrl.c VP8_CX_SRCS-yes += encoder/rdopt.c VP8_CX_SRCS-yes += encoder/segmentation.c VP8_CX_SRCS-yes += encoder/segmentation.h -VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/ssim.c VP8_CX_SRCS-yes += encoder/tokenize.c VP8_CX_SRCS-yes += encoder/dct_value_cost.h VP8_CX_SRCS-yes += encoder/dct_value_tokens.h @@ -97,7 +96,6 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm -VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt_x86_64.asm ifeq ($(CONFIG_REALTIME_ONLY),yes) VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm