From fcbff9ee04f5b67ce79fd329333c8b1970d9318d Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Thu, 2 Aug 2012 09:07:33 -0700 Subject: [PATCH] Replacing the 8x8 DCT with 8x8 ADST/DCT for I8x8 Fixed the code review comments. Under the htrans8x8 experiment the 8X8 DCT in the I8X8 mode is replaced with a combination of 8X8 ADST and DCT. Overall coding gains with the htrans8x8 experiment are: derf: 0.486 std-hd: 1.040 hd: 1.063 yt: 0.506 Note that part of the gain comes from bigger transforms (8x8 instead of 4x4) and part comes from replacing the DCT wth the ADST. Change-Id: I92ca6bbfce11b4165d612b81d9adfad4d010c775 --- configure | 2 +- vp8/common/blockd.h | 71 ++++++++++++++- vp8/common/default_coef_probs.h | 2 +- vp8/common/entropy.h | 4 +- vp8/common/idctllm.c | 152 +++++++++++++++++++++++++++++++- vp8/decoder/decodframe.c | 37 ++------ vp8/decoder/dequantize.c | 45 ++++++++++ vp8/decoder/detokenize.c | 6 +- vp8/encoder/dct.c | 147 ++++++++++++++++++++++++++++++ vp8/encoder/encodeintra.c | 38 +++----- vp8/encoder/rdopt.c | 35 ++------ vp8/encoder/tokenize.c | 4 +- 12 files changed, 452 insertions(+), 91 deletions(-) diff --git a/configure b/configure index 75b93f4d1..fc998d05e 100755 --- a/configure +++ b/configure @@ -223,8 +223,8 @@ EXPERIMENT_LIST=" pred_filter lossless hybridtransform + hybridtransform8x8 switchable_interp - htrans8x8 tx16x16 " CONFIG_LIST=" diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 64fc06546..3c43a1e9a 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -54,7 +54,6 @@ typedef struct { #define PLANE_TYPE_UV 2 #define PLANE_TYPE_Y_WITH_DC 3 - typedef char ENTROPY_CONTEXT; typedef struct { ENTROPY_CONTEXT y1[4]; @@ -179,6 +178,50 @@ typedef enum { B_MODE_COUNT } B_PREDICTION_MODE; +#if CONFIG_HYBRIDTRANSFORM8X8 +// convert MB_PREDICTION_MODE to B_PREDICTION_MODE +static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) { + B_PREDICTION_MODE b_mode; + switch (mode) { + case DC_PRED: + b_mode = B_DC_PRED; + break; + case V_PRED: + b_mode = B_VE_PRED; + break; + case H_PRED: + b_mode = B_HE_PRED; + break; + case TM_PRED: + b_mode = B_TM_PRED; + break; + case D45_PRED: + b_mode = B_LD_PRED; + break; + case D135_PRED: + b_mode = B_RD_PRED; + break; + case D117_PRED: + b_mode = B_VR_PRED; + break; + case D153_PRED: + b_mode = B_HD_PRED; + break; + case D27_PRED: + b_mode = B_VL_PRED; + break; + case D63_PRED: + b_mode = B_HU_PRED; + break; + default : + // for debug purpose, to be removed after full testing + assert(0); + break; + } + return b_mode; +} +#endif + #define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */ #define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4) @@ -389,6 +432,32 @@ typedef struct MacroBlockD { } MACROBLOCKD; +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM +// transform mapping +static void txfm_map(BLOCKD *b, B_PREDICTION_MODE bmode) { + switch (bmode) { + case B_TM_PRED : + case B_RD_PRED : + b->bmi.as_mode.tx_type = ADST_ADST; + break; + + case B_VE_PRED : + case B_VR_PRED : + b->bmi.as_mode.tx_type = ADST_DCT; + break; + + case B_HE_PRED : + case B_HD_PRED : + case B_HU_PRED : + b->bmi.as_mode.tx_type = DCT_ADST; + break; + + default : + b->bmi.as_mode.tx_type = DCT_DCT; + break; + } +} +#endif extern void vp8_build_block_doffsets(MACROBLOCKD *x); extern void vp8_setup_block_dptrs(MACROBLOCKD *x); diff --git a/vp8/common/default_coef_probs.h b/vp8/common/default_coef_probs.h index dfb0e5ea7..940e971b7 100644 --- a/vp8/common/default_coef_probs.h +++ b/vp8/common/default_coef_probs.h @@ -434,7 +434,7 @@ vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8] { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128} } } -#if CONFIG_HTRANS8X8 +#if CONFIG_HYBRIDTRANSFORM8X8 , { /* block Type 3 */ { /* Coeff Band 0 */ diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h index 4497a3d47..190221c16 100644 --- a/vp8/common/entropy.h +++ b/vp8/common/entropy.h @@ -60,9 +60,9 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ /* Coefficients are predicted via a 3-dimensional probability table. */ /* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */ - #define BLOCK_TYPES 4 -#if CONFIG_HTRANS8X8 + +#if CONFIG_HYBRIDTRANSFORM8X8 #define BLOCK_TYPES_8X8 4 #else #define BLOCK_TYPES_8X8 3 diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c index e549fe098..616e4938e 100644 --- a/vp8/common/idctllm.c +++ b/vp8/common/idctllm.c @@ -35,6 +35,8 @@ static const int cospi8sqrt2minus1 = 20091; static const int sinpi8sqrt2 = 35468; static const int rounding = 0; +// TODO: these transforms can be further converted into integer forms +// for complexity optimization #if CONFIG_HYBRIDTRANSFORM float idct_4[16] = { 0.500000000000000, 0.653281482438188, 0.500000000000000, 0.270598050073099, @@ -51,11 +53,52 @@ float iadst_4[16] = { }; #endif +#if CONFIG_HYBRIDTRANSFORM8X8 +float idct_8[64] = { + 0.353553390593274, 0.490392640201615, 0.461939766255643, 0.415734806151273, + 0.353553390593274, 0.277785116509801, 0.191341716182545, 0.097545161008064, + 0.353553390593274, 0.415734806151273, 0.191341716182545, -0.097545161008064, + -0.353553390593274, -0.490392640201615, -0.461939766255643, -0.277785116509801, + 0.353553390593274, 0.277785116509801, -0.191341716182545, -0.490392640201615, + -0.353553390593274, 0.097545161008064, 0.461939766255643, 0.415734806151273, + 0.353553390593274, 0.097545161008064, -0.461939766255643, -0.277785116509801, + 0.353553390593274, 0.415734806151273, -0.191341716182545, -0.490392640201615, + 0.353553390593274, -0.097545161008064, -0.461939766255643, 0.277785116509801, + 0.353553390593274, -0.415734806151273, -0.191341716182545, 0.490392640201615, + 0.353553390593274, -0.277785116509801, -0.191341716182545, 0.490392640201615, + -0.353553390593274, -0.097545161008064, 0.461939766255643, -0.415734806151273, + 0.353553390593274, -0.415734806151273, 0.191341716182545, 0.097545161008064, + -0.353553390593274, 0.490392640201615, -0.461939766255643, 0.277785116509801, + 0.353553390593274, -0.490392640201615, 0.461939766255643, -0.415734806151273, + 0.353553390593274, -0.277785116509801, 0.191341716182545, -0.097545161008064 +}; + +float iadst_8[64] = { + 0.089131608307533, 0.255357107325376, 0.387095214016349, 0.466553967085785, + 0.483002021635509, 0.434217976756762, 0.326790388032145, 0.175227946595735, + 0.175227946595735, 0.434217976756762, 0.466553967085785, 0.255357107325376, + -0.089131608307533, -0.387095214016348, -0.483002021635509, -0.326790388032145, + 0.255357107325376, 0.483002021635509, 0.175227946595735, -0.326790388032145, + -0.466553967085785, -0.089131608307533, 0.387095214016349, 0.434217976756762, + 0.326790388032145, 0.387095214016349, -0.255357107325376, -0.434217976756762, + 0.175227946595735, 0.466553967085786, -0.089131608307534, -0.483002021635509, + 0.387095214016349, 0.175227946595735, -0.483002021635509, 0.089131608307533, + 0.434217976756762, -0.326790388032145, -0.255357107325377, 0.466553967085785, + 0.434217976756762, -0.089131608307533, -0.326790388032145, 0.483002021635509, + -0.255357107325376, -0.175227946595735, 0.466553967085785, -0.387095214016348, + 0.466553967085785, -0.326790388032145, 0.089131608307533, 0.175227946595735, + -0.387095214016348, 0.483002021635509, -0.434217976756762, 0.255357107325376, + 0.483002021635509, -0.466553967085785, 0.434217976756762, -0.387095214016348, + 0.326790388032145, -0.255357107325375, 0.175227946595736, -0.089131608307532 +}; +#endif + #if CONFIG_HYBRIDTRANSFORM void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { int i, j, k; float bufa[16], bufb[16]; // buffers are for floating-point test purpose - // the implementation could be simplified in conjunction with integer transform + // the implementation could be simplified in + // conjunction with integer transform short *ip = input; short *op = output; int shortpitch = pitch >> 1; @@ -158,6 +201,113 @@ void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { } #endif +#if CONFIG_HYBRIDTRANSFORM8X8 +void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { + int i, j, k; + float bufa[64], bufb[64]; // buffers are for floating-point test purpose + // the implementation could be simplified in + // conjunction with integer transform + short *ip = input; + short *op = output; + int shortpitch = pitch >> 1; + + float *pfa = &bufa[0]; + float *pfb = &bufb[0]; + + // pointers to vertical and horizontal transforms + float *ptv, *pth; + + // load and convert residual array into floating-point + for(j = 0; j < 8; j++) { + for(i = 0; i < 8; i++) { + pfa[i] = (float)ip[i]; + } + pfa += 8; + ip += 8; + } + + // vertical transformation + pfa = &bufa[0]; + pfb = &bufb[0]; + + switch(tx_type) { + case ADST_ADST : + case ADST_DCT : + ptv = &iadst_8[0]; + break; + + default : + ptv = &idct_8[0]; + break; + } + + for(j = 0; j < 8; j++) { + for(i = 0; i < 8; i++) { + pfb[i] = 0 ; + for(k = 0; k < 8; k++) { + pfb[i] += ptv[k] * pfa[(k<<3)]; + } + pfa += 1; + } + + pfb += 8; + ptv += 8; + pfa = &bufa[0]; + } + + // horizontal transformation + pfa = &bufa[0]; + pfb = &bufb[0]; + + switch(tx_type) { + case ADST_ADST : + case DCT_ADST : + pth = &iadst_8[0]; + break; + + default : + pth = &idct_8[0]; + break; + } + + for(j = 0; j < 8; j++) { + for(i = 0; i < 8; i++) { + pfa[i] = 0; + for(k = 0; k < 8; k++) { + pfa[i] += pfb[k] * pth[k]; + } + pth += 8; + } + + pfa += 8; + pfb += 8; + + switch(tx_type) { + case ADST_ADST : + case DCT_ADST : + pth = &iadst_8[0]; + break; + + default : + pth = &idct_8[0]; + break; + } + } + + // convert to short integer format and load BLOCKD buffer + op = output; + pfa = &bufa[0]; + + for(j = 0; j < 8; j++) { + for(i = 0; i < 8; i++) { + op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) : + -(short)( - pfa[i] / 8 + 0.49); + } + op += shortpitch; + pfa += 8; + } +} +#endif void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) { int i; diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index d50e1dfb3..0588d002b 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -46,7 +46,6 @@ int dec_debug = 0; #define COEFCOUNT_TESTING - static int merge_index(int v, int n, int modulus) { int max1 = (n - 1 - modulus / 2) / modulus + 1; if (v < max1) v = v * modulus + modulus / 2; @@ -260,7 +259,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, } } -#if CONFIG_HTRANS8X8 +#if CONFIG_HYBRIDTRANSFORM8X8 if (xd->mode_info_context->mbmi.mode == I8X8_PRED) { xd->mode_info_context->mbmi.txfm_size = TX_8X8; } @@ -336,29 +335,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, for (i = 0; i < 16; i++) { BLOCKD *b = &xd->block[i]; int b_mode = xd->mode_info_context->bmi[i].as_mode.first; - if(active_ht) { - switch(b_mode) { - case B_TM_PRED : - case B_RD_PRED : - b->bmi.as_mode.tx_type = ADST_ADST; - break; - - case B_VE_PRED : - case B_VR_PRED : - b->bmi.as_mode.tx_type = ADST_DCT; - break ; - - case B_HE_PRED : - case B_HD_PRED : - case B_HU_PRED : - b->bmi.as_mode.tx_type = DCT_ADST; - break; - - default : - b->bmi.as_mode.tx_type = DCT_DCT; - break; - } - } + if(active_ht) + txfm_map(b, b_mode); } // loop over 4x4 blocks } #endif @@ -392,7 +370,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int i8x8mode; BLOCKD *b; -#if CONFIG_HTRANS8X8 +#if CONFIG_HYBRIDTRANSFORM8X8 int idx = (ib & 0x02) ? (ib + 2) : ib; short *q = xd->block[idx].qcoeff; @@ -410,8 +388,11 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, RECON_INVOKE(RTCD_VTABLE(recon), intra8x8_predict) (b, i8x8mode, b->predictor); -#if CONFIG_HTRANS8X8 - vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride); +#if CONFIG_HYBRIDTRANSFORM8X8 + txfm_map(b, pred_mode_conv(i8x8mode)); + vp8_ht_dequant_idct_add_8x8_c(b->bmi.as_mode.tx_type, + q, dq, pre, dst, 16, stride); + // vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride); q += 64; #else for (j = 0; j < 4; j++) { diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c index 655409176..bf44fd61a 100644 --- a/vp8/decoder/dequantize.c +++ b/vp8/decoder/dequantize.c @@ -79,6 +79,51 @@ void vp8_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq, } #endif +#if CONFIG_HYBRIDTRANSFORM8X8 +void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq, + unsigned char *pred, unsigned char *dest, + int pitch, int stride) { + short output[64]; + short *diff_ptr = output; + int b, r, c; + int i; + unsigned char *origdest = dest; + unsigned char *origpred = pred; + + input[0] = dq[0] * input[0]; + for (i = 1; i < 64; i++) { + input[i] = dq[1] * input[i]; + } + + vp8_iht8x8llm_c(input, output, 16, tx_type); + + vpx_memset(input, 0, 128); + + for (b = 0; b < 4; b++) { + for (r = 0; r < 4; r++) { + for (c = 0; c < 4; c++) { + int a = diff_ptr[c] + pred[c]; + + if (a < 0) + a = 0; + + if (a > 255) + a = 255; + + dest[c] = (unsigned char) a; + } + + dest += stride; + diff_ptr += 8; + pred += pitch; + } + diff_ptr = output + (b + 1) / 2 * 4 * 8 + (b + 1) % 2 * 4; + dest = origdest + (b + 1) / 2 * 4 * stride + (b + 1) % 2 * 4; + pred = origpred + (b + 1) / 2 * 4 * pitch + (b + 1) % 2 * 4; + } +} +#endif + void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride) { short output[16]; diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c index c93b8e9c5..5f9768d41 100644 --- a/vp8/decoder/detokenize.c +++ b/vp8/decoder/detokenize.c @@ -473,7 +473,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) { const int seg_active = segfeature_active(xd, segment_id, SEG_LVL_EOB); INT16 *qcoeff_ptr = &xd->qcoeff[0]; -#if CONFIG_HTRANS8X8 +#if CONFIG_HYBRIDTRANSFORM8X8 int bufthred = (xd->mode_info_context->mbmi.mode == I8X8_PRED) ? 16 : 24; if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && @@ -506,7 +506,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) { else seg_eob = 64; -#if CONFIG_HTRANS8X8 +#if CONFIG_HYBRIDTRANSFORM8X8 for (i = 0; i < bufthred ; i += 4) { #else for (i = 0; i < 24; i += 4) { @@ -528,7 +528,7 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) { qcoeff_ptr += 64; } -#if CONFIG_HTRANS8X8 +#if CONFIG_HYBRIDTRANSFORM8X8 if (xd->mode_info_context->mbmi.mode == I8X8_PRED) { type = PLANE_TYPE_UV; seg_eob = 16; diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c index ba2a692d1..ad5258552 100644 --- a/vp8/encoder/dct.c +++ b/vp8/encoder/dct.c @@ -17,6 +17,8 @@ #include "vp8/common/blockd.h" +// TODO: these transforms can be converted into integer forms to reduce +// the complexity float dct_4[16] = { 0.500000000000000, 0.500000000000000, 0.500000000000000, 0.500000000000000, 0.653281482438188, 0.270598050073099, -0.270598050073099, -0.653281482438188, @@ -32,6 +34,45 @@ float adst_4[16] = { }; #endif +#if CONFIG_HYBRIDTRANSFORM8X8 +float dct_8[64] = { + 0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274, + 0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274, + 0.490392640201615, 0.415734806151273, 0.277785116509801, 0.097545161008064, + -0.097545161008064, -0.277785116509801, -0.415734806151273, -0.490392640201615, + 0.461939766255643, 0.191341716182545, -0.191341716182545, -0.461939766255643, + -0.461939766255643, -0.191341716182545, 0.191341716182545, 0.461939766255643, + 0.415734806151273, -0.097545161008064, -0.490392640201615, -0.277785116509801, + 0.277785116509801, 0.490392640201615, 0.097545161008064, -0.415734806151273, + 0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274, + 0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274, + 0.277785116509801, -0.490392640201615, 0.097545161008064, 0.415734806151273, + -0.415734806151273, -0.097545161008064, 0.490392640201615, -0.277785116509801, + 0.191341716182545, -0.461939766255643, 0.461939766255643, -0.191341716182545, + -0.191341716182545, 0.461939766255643, -0.461939766255643, 0.191341716182545, + 0.097545161008064, -0.277785116509801, 0.415734806151273, -0.490392640201615, + 0.490392640201615, -0.415734806151273, 0.277785116509801, -0.097545161008064 +}; + +float adst_8[64] = { + 0.089131608307533, 0.175227946595735, 0.255357107325376, 0.326790388032145, + 0.387095214016349, 0.434217976756762, 0.466553967085785, 0.483002021635509, + 0.255357107325376, 0.434217976756762, 0.483002021635509, 0.387095214016349, + 0.175227946595735, -0.089131608307533, -0.326790388032145, -0.466553967085785, + 0.387095214016349, 0.466553967085785, 0.175227946595735, -0.255357107325376, + -0.483002021635509, -0.326790388032145, 0.089131608307533, 0.434217976756762, + 0.466553967085785, 0.255357107325376, -0.326790388032145, -0.434217976756762, + 0.089131608307533, 0.483002021635509, 0.175227946595735, -0.387095214016348, + 0.483002021635509, -0.089131608307533, -0.466553967085785, 0.175227946595735, + 0.434217976756762, -0.255357107325376, -0.387095214016348, 0.326790388032145, + 0.434217976756762, -0.387095214016348, -0.089131608307533, 0.466553967085786, + -0.326790388032145, -0.175227946595735, 0.483002021635509, -0.255357107325375, + 0.326790388032145, -0.483002021635509, 0.387095214016349, -0.089131608307534, + -0.255357107325377, 0.466553967085785, -0.434217976756762, 0.175227946595736, + 0.175227946595735, -0.326790388032145, 0.434217976756762, -0.483002021635509, + 0.466553967085785, -0.387095214016348, 0.255357107325376, -0.089131608307532 +}; +#endif static const int xC1S7 = 16069; static const int xC2S6 = 15137; @@ -394,6 +435,112 @@ void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type) { } #endif +#if CONFIG_HYBRIDTRANSFORM8X8 +void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) { + int i, j, k; + float bufa[64], bufb[64]; // buffers are for floating-point test purpose + // the implementation could be simplified in + // conjunction with integer transform + short *ip = input; + short *op = output; + + float *pfa = &bufa[0]; + float *pfb = &bufb[0]; + + // pointers to vertical and horizontal transforms + float *ptv, *pth; + + // load and convert residual array into floating-point + for(j = 0; j < 8; j++) { + for(i = 0; i < 8; i++) { + pfa[i] = (float)ip[i]; + } + pfa += 8; + ip += pitch / 2; + } + + // vertical transformation + pfa = &bufa[0]; + pfb = &bufb[0]; + + switch(tx_type) { + case ADST_ADST : + case ADST_DCT : + ptv = &adst_8[0]; + break; + + default : + ptv = &dct_8[0]; + break; + } + + for(j = 0; j < 8; j++) { + for(i = 0; i < 8; i++) { + pfb[i] = 0; + for(k = 0; k < 8; k++) { + pfb[i] += ptv[k] * pfa[(k<<3)]; + } + pfa += 1; + } + pfb += 8; + ptv += 8; + pfa = &bufa[0]; + } + + // horizontal transformation + pfa = &bufa[0]; + pfb = &bufb[0]; + + switch(tx_type) { + case ADST_ADST : + case DCT_ADST : + pth = &adst_8[0]; + break; + + default : + pth = &dct_8[0]; + break; + } + + for(j = 0; j < 8; j++) { + for(i = 0; i < 8; i++) { + pfa[i] = 0; + for(k = 0; k < 8; k++) { + pfa[i] += pfb[k] * pth[k]; + } + pth += 8; + } + + pfa += 8; + pfb += 8; + + switch(tx_type) { + case ADST_ADST : + case DCT_ADST : + pth = &adst_8[0]; + break; + + default : + pth = &dct_8[0]; + break; + } + } + + // convert to short integer format and load BLOCKD buffer + op = output ; + pfa = &bufa[0] ; + + for(j = 0; j < 8; j++) { + for(i = 0; i < 8; i++) { + op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) : + -(short)(- 8 * pfa[i] + 0.49); + } + op += 8; + pfa += 8; + } +} +#endif + void vp8_short_fdct4x4_c(short *input, short *output, int pitch) { int i; int a1, b1, c1, d1; diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 325efeb6b..964046d92 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -90,28 +90,7 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, #if CONFIG_HYBRIDTRANSFORM if(active_ht) { b->bmi.as_mode.test = b->bmi.as_mode.first; - switch(b->bmi.as_mode.first) { - // case B_DC_PRED : - case B_TM_PRED : - case B_RD_PRED : - b->bmi.as_mode.tx_type = ADST_ADST; - break; - - case B_VE_PRED : - case B_VR_PRED : - b->bmi.as_mode.tx_type = ADST_DCT; - break; - - case B_HE_PRED : - case B_HD_PRED : - case B_HU_PRED : - b->bmi.as_mode.tx_type = DCT_ADST; - break; - - default : - b->bmi.as_mode.tx_type = DCT_DCT; - break; - } + txfm_map(b, b->bmi.as_mode.first); vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type); vp8_ht_quantize_b(be, b); @@ -329,16 +308,25 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd, } #endif -#if CONFIG_HTRANS8X8 +#if CONFIG_HYBRIDTRANSFORM8X8 { MACROBLOCKD *xd = &x->e_mbd; int idx = (ib & 0x02) ? (ib + 2) : ib; // generate residual blocks vp8_subtract_4b_c(be, b, 16); - x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); + + txfm_map(b, pred_mode_conv(b->bmi.as_mode.first)); + + vp8_fht8x8_c(be->src_diff, (x->block + idx)->coeff, 32, + b->bmi.as_mode.tx_type); x->quantize_b_8x8(x->block + idx, xd->block + idx); - vp8_short_idct8x8_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32); + vp8_iht8x8llm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32, + b->bmi.as_mode.tx_type); + +// x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); +// x->quantize_b_8x8(x->block + idx, xd->block + idx); +// vp8_short_idct8x8_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32); // reconstruct submacroblock for (i = 0; i < 4; i++) { diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index a2cd2651a..6eb10f4f1 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -454,7 +454,7 @@ int vp8_block_error_c(short *coeff, short *dqcoeff) { return error; } -#if CONFIG_HTRANS8X8 +#if CONFIG_HYBRIDTRANSFORM8X8 int vp8_submb_error_c(short *coeff, short *dqcoeff) { int i; int error = 0; @@ -985,28 +985,7 @@ static int64_t rd_pick_intra4x4block( #if CONFIG_HYBRIDTRANSFORM if(active_ht) { b->bmi.as_mode.test = mode; - switch(mode) { - // case B_DC_PRED : - case B_TM_PRED : - case B_RD_PRED : - b->bmi.as_mode.tx_type = ADST_ADST; - break; - - case B_VE_PRED : - case B_VR_PRED : - b->bmi.as_mode.tx_type = ADST_DCT; - break; - - case B_HE_PRED : - case B_HD_PRED : - case B_HU_PRED : - b->bmi.as_mode.tx_type = DCT_ADST; - break; - - default : - b->bmi.as_mode.tx_type = DCT_DCT; - break; - } + txfm_map(b, mode); vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type); vp8_ht_quantize_b(be, b); @@ -1267,7 +1246,7 @@ static int64_t rd_pick_intra8x8block( DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16 * 8); DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16 * 4); -#if CONFIG_HTRANS8X8 +#if CONFIG_HYBRIDTRANSFORM8X8 // perform transformation of dimension 8x8 // note the input and output index mapping int idx = (ib & 0x02) ? (ib + 2) : ib; @@ -1298,8 +1277,10 @@ static int64_t rd_pick_intra8x8block( vp8_subtract_4b_c(be, b, 16); -#if CONFIG_HTRANS8X8 - x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); +#if CONFIG_HYBRIDTRANSFORM8X8 + txfm_map(b, pred_mode_conv(mode)); + vp8_fht8x8_c(be->src_diff, (x->block + idx)->coeff, 32, b->bmi.as_mode.tx_type); +// x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); x->quantize_b_8x8(x->block + idx, xd->block + idx); // compute quantization mse of 8x8 block @@ -1376,7 +1357,7 @@ static int64_t rd_pick_intra8x8block( #endif vp8_encode_intra8x8(IF_RTCD(&cpi->rtcd), x, ib); -#if CONFIG_HTRANS8X8 +#if CONFIG_HYBRIDTRANSFORM8X8 *(a + vp8_block2above_8x8[idx]) = besta0; *(a + vp8_block2above_8x8[idx] + 1) = besta1; *(l + vp8_block2left_8x8 [idx]) = bestl0; diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index dac18c6db..105aa6a7c 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -504,7 +504,7 @@ static void tokenize1st_order_ht( MACROBLOCKD *xd, #endif -#if CONFIG_HTRANS8X8 +#if CONFIG_HYBRIDTRANSFORM8X8 static void tokenize1st_order_chroma ( MACROBLOCKD *xd, @@ -886,7 +886,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { tokenize1st_order_ht(x, t, plane_type, cpi); } else { -#if CONFIG_HTRANS8X8 +#if CONFIG_HYBRIDTRANSFORM8X8 if (x->mode_info_context->mbmi.mode == I8X8_PRED) { ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context; ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;