From 5abafcc38115eab70d77109d26be151205fd2172 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Wed, 27 Apr 2011 13:40:39 -0400 Subject: [PATCH 1/2] Use insertion sort instead of quick sort Insertion sort performs better for sorting small arrays. In real- time encoding (speed=-5), test on test set showed 1.7% performance gain with 0% PSNR change in average. Change-Id: Ie02eaa6fed662866a937299194c590d41b25bc3d --- vp8/encoder/rdopt.c | 119 ++++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 75 deletions(-) diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index e99d6f0d1..d4294648c 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -1430,86 +1430,55 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, return bsi.segment_rd; } -static void swap(int *x,int *y) +static void insertsortmv(int arr[], int len) { - int tmp; + int i, j, k; - tmp = *x; - *x = *y; - *y = tmp; + for ( i = 1 ; i <= len-1 ; i++ ) + { + for ( j = 0 ; j < i ; j++ ) + { + if ( arr[j] > arr[i] ) + { + int temp; + + temp = arr[i]; + + for ( k = i; k >j; k--) + arr[k] = arr[k - 1] ; + + arr[j] = temp ; + } + } + } } -static void quicksortmv(int arr[],int left, int right) +static void insertsortsad(int arr[],int idx[], int len) { - int lidx,ridx,pivot; + int i, j, k; - lidx = left; - ridx = right; + for ( i = 1 ; i <= len-1 ; i++ ) + { + for ( j = 0 ; j < i ; j++ ) + { + if ( arr[j] > arr[i] ) + { + int temp, tempi; - if( left < right) - { - pivot = (left + right)/2; + temp = arr[i]; + tempi = idx[i]; - while(lidx <=pivot && ridx >=pivot) - { - while(arr[lidx] < arr[pivot] && lidx <= pivot) - lidx++; - while(arr[ridx] > arr[pivot] && ridx >= pivot) - ridx--; - swap(&arr[lidx], &arr[ridx]); - lidx++; - ridx--; - if(lidx-1 == pivot) - { - ridx++; - pivot = ridx; - } - else if(ridx+1 == pivot) - { - lidx--; - pivot = lidx; - } - } - quicksortmv(arr, left, pivot - 1); - quicksortmv(arr, pivot + 1, right); - } -} + for ( k = i; k >j; k--) + { + arr[k] = arr[k - 1] ; + idx[k] = idx[k - 1]; + } -static void quicksortsad(int arr[],int idx[], int left, int right) -{ - int lidx,ridx,pivot; - - lidx = left; - ridx = right; - - if( left < right) - { - pivot = (left + right)/2; - - while(lidx <=pivot && ridx >=pivot) - { - while(arr[lidx] < arr[pivot] && lidx <= pivot) - lidx++; - while(arr[ridx] > arr[pivot] && ridx >= pivot) - ridx--; - swap(&arr[lidx], &arr[ridx]); - swap(&idx[lidx], &idx[ridx]); - lidx++; - ridx--; - if(lidx-1 == pivot) - { - ridx++; - pivot = ridx; - } - else if(ridx+1 == pivot) - { - lidx--; - pivot = lidx; - } - } - quicksortsad(arr, idx, left, pivot - 1); - quicksortsad(arr, idx, pivot + 1, right); - } + arr[j] = temp ; + idx[j] = tempi; + } + } + } } //The improved MV prediction @@ -1645,8 +1614,8 @@ void vp8_mv_pred mvy[i] = near_mvs[i].as_mv.col; } - quicksortmv (mvx, 0, vcnt-1); - quicksortmv (mvy, 0, vcnt-1); + insertsortmv(mvx, vcnt); + insertsortmv(mvy, vcnt); mv.as_mv.row = mvx[vcnt/2]; mv.as_mv.col = mvy[vcnt/2]; @@ -1709,10 +1678,10 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse if(cpi->common.last_frame_type != KEY_FRAME) { - quicksortsad(near_sad, near_sadidx, 0, 7); + insertsortsad(near_sad, near_sadidx, 8); }else { - quicksortsad(near_sad, near_sadidx, 0, 2); + insertsortsad(near_sad, near_sadidx, 3); } } From ccd6f7ed77c1b28fc42302e9c34972bc289163fe Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Thu, 28 Apr 2011 10:53:59 -0400 Subject: [PATCH 2/2] Consolidated build inter predictors Code cleanup. Change-Id: Ic8b0167851116c64ddf08e8a3d302fb09ab61146 --- vp8/common/reconinter.c | 239 +++++++++++++------------------------- vp8/common/reconinter.h | 8 +- vp8/decoder/decodframe.c | 8 +- vp8/decoder/threading.c | 6 +- vp8/encoder/encodeframe.c | 5 +- 5 files changed, 104 insertions(+), 162 deletions(-) diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c index 80b17acb6..3b0405ca1 100644 --- a/vp8/common/reconinter.c +++ b/vp8/common/reconinter.c @@ -279,100 +279,111 @@ void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x) } } -void vp8_build_inter_predictors_mb(MACROBLOCKD *x) +void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, + unsigned char *dst_y, + unsigned char *dst_u, + unsigned char *dst_v, + int dst_ystride, + int dst_uvstride) { + int offset; + unsigned char *ptr; + unsigned char *uptr, *vptr; - if (x->mode_info_context->mbmi.mode != SPLITMV) + int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; + int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; + + unsigned char *ptr_base = x->pre.y_buffer; + int pre_stride = x->block[0].pre_stride; + + ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); + + if ((mv_row | mv_col) & 7) { - int offset; - unsigned char *ptr_base; - unsigned char *ptr; - unsigned char *uptr, *vptr; - unsigned char *pred_ptr = x->predictor; - unsigned char *upred_ptr = &x->predictor[256]; - unsigned char *vpred_ptr = &x->predictor[320]; + x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_y, dst_ystride); + } + else + { + RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_y, dst_ystride); + } - int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; - int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; - int pre_stride = x->block[0].pre_stride; + mv_row = x->block[16].bmi.mv.as_mv.row; + mv_col = x->block[16].bmi.mv.as_mv.col; + pre_stride >>= 1; + offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); + uptr = x->pre.u_buffer + offset; + vptr = x->pre.v_buffer + offset; - ptr_base = x->pre.y_buffer; - ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); + if ((mv_row | mv_col) & 7) + { + x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, dst_u, dst_uvstride); + x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, dst_v, dst_uvstride); + } + else + { + RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, dst_u, dst_uvstride); + RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, dst_v, dst_uvstride); + } - if ((mv_row | mv_col) & 7) +} + +void vp8_build_inter4x4_predictors_mb(MACROBLOCKD *x) +{ + int i; + + if (x->mode_info_context->mbmi.partitioning < 3) + { + for (i = 0; i < 4; i++) { - x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16); - } - else - { - RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16); - } - - mv_row = x->block[16].bmi.mv.as_mv.row; - mv_col = x->block[16].bmi.mv.as_mv.col; - pre_stride >>= 1; - offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); - uptr = x->pre.u_buffer + offset; - vptr = x->pre.v_buffer + offset; - - if ((mv_row | mv_col) & 7) - { - x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8); - x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8); - } - else - { - RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, upred_ptr, 8); - RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vpred_ptr, 8); + BLOCKD *d = &x->block[bbb[i]]; + build_inter_predictors4b(x, d, 16); } } else { - int i; - - if (x->mode_info_context->mbmi.partitioning < 3) - { - for (i = 0; i < 4; i++) - { - BLOCKD *d = &x->block[bbb[i]]; - build_inter_predictors4b(x, d, 16); - } - } - else - { - for (i = 0; i < 16; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, 16); - else - { - vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict); - vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict); - } - - } - - } - - for (i = 16; i < 24; i += 2) + for (i = 0; i < 16; i += 2) { BLOCKD *d0 = &x->block[i]; BLOCKD *d1 = &x->block[i+1]; if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, 8); + build_inter_predictors2b(x, d0, 16); else { - vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict); - vp8_build_inter_predictors_b(d1, 8, x->subpixel_predict); + vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict); + vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict); } } } + + for (i = 16; i < 24; i += 2) + { + BLOCKD *d0 = &x->block[i]; + BLOCKD *d1 = &x->block[i+1]; + + if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) + build_inter_predictors2b(x, d0, 8); + else + { + vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict); + vp8_build_inter_predictors_b(d1, 8, x->subpixel_predict); + } + } +} + +void vp8_build_inter_predictors_mb(MACROBLOCKD *x) +{ + if (x->mode_info_context->mbmi.mode != SPLITMV) + { + vp8_build_inter16x16_predictors_mb(x, x->predictor, &x->predictor[256], + &x->predictor[320], 16, 8); + } + else + { + vp8_build_inter4x4_predictors_mb(x); + } } void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel) @@ -455,91 +466,5 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel) } -/* The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this - * situation, we can write the result directly to dst buffer instead of writing it to predictor - * buffer and then copying it to dst buffer. - */ -static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp8_subpix_fn_t sppf) -{ - int r; - unsigned char *ptr_base; - unsigned char *ptr; - /*unsigned char *pred_ptr = d->predictor;*/ - int dst_stride = d->dst_stride; - int pre_stride = d->pre_stride; - - ptr_base = *(d->base_pre); - - if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) - { - ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3); - sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, dst_stride); - } - else - { - ptr_base += d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3); - ptr = ptr_base; - - for (r = 0; r < 4; r++) - { -#ifdef MUST_BE_ALIGNED - dst_ptr[0] = ptr[0]; - dst_ptr[1] = ptr[1]; - dst_ptr[2] = ptr[2]; - dst_ptr[3] = ptr[3]; -#else - *(int *)dst_ptr = *(int *)ptr ; -#endif - dst_ptr += dst_stride; - ptr += pre_stride; - } - } -} - -void vp8_build_inter16x16_predictors_mb_s(MACROBLOCKD *x) -{ - unsigned char *dst_ptr = x->dst.y_buffer; - - int offset; - unsigned char *ptr_base; - unsigned char *ptr; - unsigned char *uptr, *vptr; - unsigned char *udst_ptr = x->dst.u_buffer; - unsigned char *vdst_ptr = x->dst.v_buffer; - - int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; - int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; - int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/ - - ptr_base = x->pre.y_buffer; - ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); - - if ((mv_row | mv_col) & 7) - { - x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ - } - else - { - RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ - } - - mv_row = x->block[16].bmi.mv.as_mv.row; - mv_col = x->block[16].bmi.mv.as_mv.col; - pre_stride >>= 1; - offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); - uptr = x->pre.u_buffer + offset; - vptr = x->pre.v_buffer + offset; - - if ((mv_row | mv_col) & 7) - { - x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, udst_ptr, x->dst.uv_stride); - x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vdst_ptr, x->dst.uv_stride); - } - else - { - RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, udst_ptr, x->dst.uv_stride); - RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vdst_ptr, x->dst.uv_stride); - } -} diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h index bdf49c9d0..a68e4aaba 100644 --- a/vp8/common/reconinter.h +++ b/vp8/common/reconinter.h @@ -13,7 +13,13 @@ #define __INC_RECONINTER_H extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x); -extern void vp8_build_inter16x16_predictors_mb_s(MACROBLOCKD *x); +extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, + unsigned char *dst_y, + unsigned char *dst_u, + unsigned char *dst_v, + int dst_ystride, + int dst_uvstride); + extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x); extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel); diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 2c677cab3..a585f774c 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -119,7 +119,9 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd) } else { - vp8_build_inter16x16_predictors_mb_s(xd); + vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); } } @@ -221,6 +223,9 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd) build_intra_predictors_mby)(xd); } else { vp8_intra_prediction_down_copy(xd); + + + } } else @@ -232,6 +237,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd) if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV) { BLOCKD *b = &xd->block[24]; + DEQUANT_INVOKE(&pbi->dequant, block)(b); /* do 2nd order transform on the dc block */ diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index 16afd591d..56275940e 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -118,14 +118,16 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m xd->mode_info_context->mbmi.mb_skip_coeff = 1; /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/ - if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col); vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col); } else { - vp8_build_inter16x16_predictors_mb_s(xd); + vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); } return; } diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 380e9933f..4a936ec4a 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -1391,7 +1391,10 @@ int vp8cx_encode_inter_macroblock } else - vp8_build_inter16x16_predictors_mb_s(xd); + vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); + } if (!x->skip)