From e5f4e8eab9f0fe74003723d00ceb801d9618ccbb Mon Sep 17 00:00:00 2001 From: Yi Luo Date: Thu, 7 Apr 2016 15:06:43 -0700 Subject: [PATCH] Some cosmetic improvements since HBD variance 4x4 optimization Change-Id: I414c1fabd2e3a9b1d9daa8a90f85a0bace8bd3cd --- vp10/encoder/mcomp.c | 4 +- vpx_dsp/variance.c | 92 ++++++++++++++++-------------- vpx_dsp/variance.h | 4 +- vpx_dsp/x86/highbd_variance_sse4.c | 72 +++++++++++++---------- 4 files changed, 95 insertions(+), 77 deletions(-) diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c index 0c8ec43eb..4327d974c 100644 --- a/vp10/encoder/mcomp.c +++ b/vp10/encoder/mcomp.c @@ -367,8 +367,8 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd, if (second_pred != NULL) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]); - vpx_highbd_comp_avg_pred_c(comp_pred16, second_pred, w, h, y + offset, - y_stride); + vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, + y_stride); besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1); } else { diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c index 90c8bed52..e6be1dd73 100644 --- a/vpx_dsp/variance.c +++ b/vpx_dsp/variance.c @@ -433,7 +433,7 @@ uint32_t vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, \ return *sse; \ } -void highbd_var_filter_block2d_bil_first_pass( +void vpx_highbd_var_filter_block2d_bil_first_pass( const uint8_t *src_ptr8, uint16_t *output_ptr, unsigned int src_pixels_per_line, @@ -459,7 +459,7 @@ void highbd_var_filter_block2d_bil_first_pass( } } -void highbd_var_filter_block2d_bil_second_pass( +void vpx_highbd_var_filter_block2d_bil_second_pass( const uint16_t *src_ptr, uint16_t *output_ptr, unsigned int src_pixels_per_line, @@ -492,13 +492,14 @@ uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ - dst_stride, sse); \ + dst_stride, sse); \ } \ \ uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \ @@ -509,10 +510,11 @@ uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, sse); \ @@ -526,10 +528,11 @@ uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, sse); \ @@ -546,16 +549,17 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \ uint16_t temp2[H * W]; \ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \ CONVERT_TO_BYTEPTR(temp2), W); \ \ return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ - dst_stride, sse); \ + dst_stride, sse); \ } \ \ uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ @@ -568,10 +572,11 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ uint16_t temp2[H * W]; \ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \ CONVERT_TO_BYTEPTR(temp2), W); \ @@ -590,10 +595,11 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ uint16_t temp2[H * W]; \ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ - W, bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass( \ + src, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \ CONVERT_TO_BYTEPTR(temp2), W); \ @@ -914,11 +920,11 @@ unsigned int vpx_highbd_masked_sub_pixel_variance##W##x##H##_c( \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ - H + 1, W, \ - bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ + H + 1, W, \ + bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, \ @@ -934,11 +940,11 @@ unsigned int vpx_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ - H + 1, W, \ - bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ + H + 1, W, \ + bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_10_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, \ @@ -954,11 +960,11 @@ unsigned int vpx_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ - H + 1, W, \ - bilinear_filters_2t[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters_2t[yoffset]); \ + vpx_highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ + H + 1, W, \ + bilinear_filters_2t[xoffset]); \ + vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_12_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ W, dst, dst_stride, \ diff --git a/vpx_dsp/variance.h b/vpx_dsp/variance.h index 4ad23f8ae..175985468 100644 --- a/vpx_dsp/variance.h +++ b/vpx_dsp/variance.h @@ -130,7 +130,7 @@ typedef struct vp10_variance_vtable { } vp10_variance_fn_ptr_t; #endif // CONFIG_VP10 -void highbd_var_filter_block2d_bil_first_pass( +void vpx_highbd_var_filter_block2d_bil_first_pass( const uint8_t *src_ptr8, uint16_t *output_ptr, unsigned int src_pixels_per_line, @@ -139,7 +139,7 @@ void highbd_var_filter_block2d_bil_first_pass( unsigned int output_width, const uint8_t *filter); -void highbd_var_filter_block2d_bil_second_pass( +void vpx_highbd_var_filter_block2d_bil_second_pass( const uint16_t *src_ptr, uint16_t *output_ptr, unsigned int src_pixels_per_line, diff --git a/vpx_dsp/x86/highbd_variance_sse4.c b/vpx_dsp/x86/highbd_variance_sse4.c index 18ecc7efd..5c1dfe4dc 100644 --- a/vpx_dsp/x86/highbd_variance_sse4.c +++ b/vpx_dsp/x86/highbd_variance_sse4.c @@ -119,10 +119,12 @@ uint32_t vpx_highbd_8_sub_pixel_variance4x4_sse4_1( uint16_t fdata3[(4 + 1) * 4]; uint16_t temp2[4 * 4]; - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1, - 4, bilinear_filters_2t[xoffset]); - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, - bilinear_filters_2t[yoffset]); + vpx_highbd_var_filter_block2d_bil_first_pass( + src, fdata3, src_stride, 1, 4 + 1, + 4, bilinear_filters_2t[xoffset]); + vpx_highbd_var_filter_block2d_bil_second_pass( + fdata3, temp2, 4, 4, 4, 4, + bilinear_filters_2t[yoffset]); return vpx_highbd_8_variance4x4(CONVERT_TO_BYTEPTR(temp2), 4, dst, dst_stride, sse); @@ -137,10 +139,12 @@ uint32_t vpx_highbd_10_sub_pixel_variance4x4_sse4_1( uint16_t fdata3[(4 + 1) * 4]; uint16_t temp2[4 * 4]; - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1, - 4, bilinear_filters_2t[xoffset]); - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, - bilinear_filters_2t[yoffset]); + vpx_highbd_var_filter_block2d_bil_first_pass( + src, fdata3, src_stride, 1, 4 + 1, + 4, bilinear_filters_2t[xoffset]); + vpx_highbd_var_filter_block2d_bil_second_pass( + fdata3, temp2, 4, 4, 4, 4, + bilinear_filters_2t[yoffset]); return vpx_highbd_10_variance4x4(CONVERT_TO_BYTEPTR(temp2), 4, dst, dst_stride, sse); @@ -155,10 +159,12 @@ uint32_t vpx_highbd_12_sub_pixel_variance4x4_sse4_1( uint16_t fdata3[(4 + 1) * 4]; uint16_t temp2[4 * 4]; - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1, - 4, bilinear_filters_2t[xoffset]); - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, - bilinear_filters_2t[yoffset]); + vpx_highbd_var_filter_block2d_bil_first_pass( + src, fdata3, src_stride, 1, 4 + 1, + 4, bilinear_filters_2t[xoffset]); + vpx_highbd_var_filter_block2d_bil_second_pass( + fdata3, temp2, 4, 4, 4, 4, + bilinear_filters_2t[yoffset]); return vpx_highbd_12_variance4x4(CONVERT_TO_BYTEPTR(temp2), 4, dst, dst_stride, sse); @@ -177,13 +183,15 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4_sse4_1( uint16_t temp2[4 * 4]; DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]); - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1, - 4, bilinear_filters_2t[xoffset]); - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, - bilinear_filters_2t[yoffset]); + vpx_highbd_var_filter_block2d_bil_first_pass( + src, fdata3, src_stride, 1, 4 + 1, + 4, bilinear_filters_2t[xoffset]); + vpx_highbd_var_filter_block2d_bil_second_pass( + fdata3, temp2, 4, 4, 4, 4, + bilinear_filters_2t[yoffset]); - vpx_highbd_comp_avg_pred_c(temp3, second_pred, 4, 4, - CONVERT_TO_BYTEPTR(temp2), 4); + vpx_highbd_comp_avg_pred(temp3, second_pred, 4, 4, + CONVERT_TO_BYTEPTR(temp2), 4); return vpx_highbd_8_variance4x4(CONVERT_TO_BYTEPTR(temp3), 4, dst, dst_stride, sse); @@ -200,13 +208,15 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4_sse4_1( uint16_t temp2[4 * 4]; DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]); - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1, - 4, bilinear_filters_2t[xoffset]); - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, - bilinear_filters_2t[yoffset]); + vpx_highbd_var_filter_block2d_bil_first_pass( + src, fdata3, src_stride, 1, 4 + 1, + 4, bilinear_filters_2t[xoffset]); + vpx_highbd_var_filter_block2d_bil_second_pass( + fdata3, temp2, 4, 4, 4, 4, + bilinear_filters_2t[yoffset]); - vpx_highbd_comp_avg_pred_c(temp3, second_pred, 4, 4, - CONVERT_TO_BYTEPTR(temp2), 4); + vpx_highbd_comp_avg_pred(temp3, second_pred, 4, 4, + CONVERT_TO_BYTEPTR(temp2), 4); return vpx_highbd_10_variance4x4(CONVERT_TO_BYTEPTR(temp3), 4, dst, dst_stride, sse); @@ -223,13 +233,15 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4_sse4_1( uint16_t temp2[4 * 4]; DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]); - highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1, - 4, bilinear_filters_2t[xoffset]); - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, - bilinear_filters_2t[yoffset]); + vpx_highbd_var_filter_block2d_bil_first_pass( + src, fdata3, src_stride, 1, 4 + 1, + 4, bilinear_filters_2t[xoffset]); + vpx_highbd_var_filter_block2d_bil_second_pass( + fdata3, temp2, 4, 4, 4, 4, + bilinear_filters_2t[yoffset]); - vpx_highbd_comp_avg_pred_c(temp3, second_pred, 4, 4, - CONVERT_TO_BYTEPTR(temp2), 4); + vpx_highbd_comp_avg_pred(temp3, second_pred, 4, 4, + CONVERT_TO_BYTEPTR(temp2), 4); return vpx_highbd_12_variance4x4(CONVERT_TO_BYTEPTR(temp3), 4, dst, dst_stride, sse);