Merge pull request #1908 from HaiboZhu/Add_Highprofile

Add high profile support
This commit is contained in:
huili2 2015-04-21 09:42:49 +08:00
commit dac13639c9
25 changed files with 1801 additions and 263 deletions

View File

@ -65,9 +65,12 @@ extern const uint8_t g_kuiMbCountScan4Idx[24];
extern const uint8_t g_kuiCache30ScanIdx[16];
extern const uint8_t g_kuiCache48CountScan4Idx[24];
extern const uint8_t g_kuiMatrixV[6][8][8];
extern const uint8_t g_kuiDequantScaling4x4Default[2][16];
extern const uint8_t g_kuiDequantScaling8x8Default[2][64];
extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff[52][8], 16);
extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff[52][8], 16);
extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff8x8[52][64], 16);
extern const uint8_t g_kuiChromaQpTable[52];
extern const uint8_t g_kuiCabacRangeLps[64][4];
@ -279,6 +282,8 @@ typedef struct TagNalUnitHeaderExt {
#define MB_TYPE_INTRA (MB_TYPE_INTRA4x4 | MB_TYPE_INTRA16x16 | MB_TYPE_INTRA8x8 | MB_TYPE_INTRA_PCM)
#define MB_TYPE_INTER (MB_TYPE_16x16 | MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_8x8_REF0 | MB_TYPE_SKIP)
#define IS_INTRA4x4(type) ( MB_TYPE_INTRA4x4 == (type) )
#define IS_INTRA8x8(type) ( MB_TYPE_INTRA8x8 == (type) )
#define IS_INTRANxN(type) ( MB_TYPE_INTRA4x4 == (type) || MB_TYPE_INTRA8x8 == (type) )
#define IS_INTRA16x16(type) ( MB_TYPE_INTRA16x16 == (type) )
#define IS_INTRA(type) ( (type)&MB_TYPE_INTRA )
#define IS_INTER(type) ( (type)&MB_TYPE_INTER )
@ -304,6 +309,7 @@ typedef struct TagNalUnitHeaderExt {
#define I16_PRED_DC_128 6
#define I16_PRED_DC_A 7
//////////intra4x4 Luma
// Here, I8x8 also use these definitions
#define I4_PRED_INVALID 0
#define I4_PRED_V 0
#define I4_PRED_H 1

View File

@ -37,12 +37,12 @@ namespace WelsCommon {
//////pNonZeroCount[16+8] mapping scan index
const uint8_t g_kuiMbCountScan4Idx[24] = {
// 0 1 | 4 5 luma 8*8 block pNonZeroCount[16+8]
// 0 1 | 4 5 luma 8*8 block pNonZeroCount[16+8]
0, 1, 4, 5, // 2 3 | 6 7 0 | 1 0 1 2 3
2, 3, 6, 7, //--------------- --------- 4 5 6 7
8, 9, 12, 13, // 8 9 | 12 13 2 | 3 8 9 10 11
10, 11, 14, 15, // 10 11 | 14 15-----------------------------> 12 13 14 15
16, 17, 20, 21, //---------------- chroma 8*8 block 16 17 18 19
10, 11, 14, 15, // 10 11 | 14 15-----------------------------> 12 13 14 15
16, 17, 20, 21, //---------------- chroma 8*8 block 16 17 18 19
18, 19, 22, 23 // 16 17 | 20 21 0 1 20 21 22 23
};
@ -61,6 +61,68 @@ const uint8_t g_kuiCache48CountScan4Idx[24] = {
46, 47, // 6+5*8, 7+5*8,
};
const uint8_t g_kuiMatrixV[6][8][8] = { // generated from equation 8-317, 8-318
{
{20, 19, 25, 19, 20, 19, 25, 19},
{19, 18, 24, 18, 19, 18, 24, 18},
{25, 24, 32, 24, 25, 24, 32, 24},
{19, 18, 24, 18, 19, 18, 24, 18},
{20, 19, 25, 19, 20, 19, 25, 19},
{19, 18, 24, 18, 19, 18, 24, 18},
{25, 24, 32, 24, 25, 24, 32, 24},
{19, 18, 24, 18, 19, 18, 24, 18}
},
{
{22, 21, 28, 21, 22, 21, 28, 21},
{21, 19, 26, 19, 21, 19, 26, 19},
{28, 26, 35, 26, 28, 26, 35, 26},
{21, 19, 26, 19, 21, 19, 26, 19},
{22, 21, 28, 21, 22, 21, 28, 21},
{21, 19, 26, 19, 21, 19, 26, 19},
{28, 26, 35, 26, 28, 26, 35, 26},
{21, 19, 26, 19, 21, 19, 26, 19}
},
{
{26, 24, 33, 24, 26, 24, 33, 24},
{24, 23, 31, 23, 24, 23, 31, 23},
{33, 31, 42, 31, 33, 31, 42, 31},
{24, 23, 31, 23, 24, 23, 31, 23},
{26, 24, 33, 24, 26, 24, 33, 24},
{24, 23, 31, 23, 24, 23, 31, 23},
{33, 31, 42, 31, 33, 31, 42, 31},
{24, 23, 31, 23, 24, 23, 31, 23}
},
{
{28, 26, 35, 26, 28, 26, 35, 26},
{26, 25, 33, 25, 26, 25, 33, 25},
{35, 33, 45, 33, 35, 33, 45, 33},
{26, 25, 33, 25, 26, 25, 33, 25},
{28, 26, 35, 26, 28, 26, 35, 26},
{26, 25, 33, 25, 26, 25, 33, 25},
{35, 33, 45, 33, 35, 33, 45, 33},
{26, 25, 33, 25, 26, 25, 33, 25}
},
{
{32, 30, 40, 30, 32, 30, 40, 30},
{30, 28, 38, 28, 30, 28, 38, 28},
{40, 38, 51, 38, 40, 38, 51, 38},
{30, 28, 38, 28, 30, 28, 38, 28},
{32, 30, 40, 30, 32, 30, 40, 30},
{30, 28, 38, 28, 30, 28, 38, 28},
{40, 38, 51, 38, 40, 38, 51, 38},
{30, 28, 38, 28, 30, 28, 38, 28}
},
{
{36, 34, 46, 34, 36, 34, 46, 34},
{34, 32, 43, 32, 34, 32, 43, 32},
{46, 43, 58, 43, 46, 43, 58, 43},
{34, 32, 43, 32, 34, 32, 43, 32},
{36, 34, 46, 34, 36, 34, 46, 34},
{34, 32, 43, 32, 34, 32, 43, 32},
{46, 43, 58, 43, 46, 43, 58, 43},
{34, 32, 43, 32, 34, 32, 43, 32}
}
};
//cache element equal to 30
const uint8_t g_kuiCache30ScanIdx[16] = { //mv or uiRefIndex cache scan index, 4*4 block as basic unit
@ -172,6 +234,113 @@ ALIGNED_DECLARE (const uint16_t, g_kuiDequantCoeff[52][8], 16) = {
/*50*/{ 3328, 4096, 3328, 4096, 4096, 5120, 4096, 5120 }, /*51*/{ 3584, 4608, 3584, 4608, 4608, 5888, 4608, 5888 },
};
ALIGNED_DECLARE (const uint16_t, g_kuiDequantCoeff8x8[52][64], 16) = {
/* QP == 0 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 1 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 2 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 3 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 4 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 5 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 6 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 7 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 8 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 9 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 10 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 11 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 12 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 13 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 14 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 15 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 16 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 17 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 18 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 19 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 20 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 21 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 22 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 23 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 24 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 25 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 26 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 27 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 28 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 29 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 30 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 31 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 32 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 33 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 34 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 35 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 36 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 37 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 38 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 39 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 40 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 41 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 42 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 43 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 44 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 45 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 46 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 47 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 48 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 49 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 50 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 51 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
};
// table A-1 - Level limits
const SLevelLimits g_ksLevelLimits[LEVEL_NUMBER] = {
{LEVEL_1_0, 1485, 99, 396, 64, 175, -256, 255, 2, 0x7fff}, /* level 1 */

View File

@ -68,10 +68,12 @@ struct TagDqLayer {
int16_t (*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A];
int16_t (*pMvd[LIST_A])[MB_BLOCK4x4_NUM][MV_A];
int8_t (*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM];
bool* pNoSubMbPartSizeLessThan8x8Flag;
bool* pTransformSize8x8Flag;
int8_t* pLumaQp;
int8_t (*pChromaQp)[2];
int8_t* pCbp;
uint8_t *pCbfDc;
uint16_t *pCbfDc;
int8_t (*pNzc)[24];
int8_t (*pNzcRs)[24];
int8_t* pResidualPredFlag;
@ -81,6 +83,7 @@ struct TagDqLayer {
int16_t (*pScaledTCoeff)[MB_COEFF_LIST_SIZE];
int8_t (*pIntraPredMode)[8]; //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
int8_t (*pIntra4x4FinalMode)[MB_BLOCK4x4_NUM];
uint8_t *pIntraNxNAvailFlag;
int8_t* pChromaPredMode;
//uint8_t (*motion_pred_flag[LIST_A])[MB_PARTITION_SIZE]; // 8x8
int8_t (*pSubMbType)[MB_SUB_PARTITION_SIZE];
@ -132,7 +135,6 @@ typedef struct TagGpuAvcLayer {
int8_t* pCbp;
int8_t (*pNzc)[24];
int8_t (*pIntraPredMode)[8]; //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
int32_t iMbX;
int32_t iMbY;
int32_t iMbXyIndex;

View File

@ -232,6 +232,18 @@ static inline int32_t BsGetTrailingBits (uint8_t* pBuf) {
return 0;
}
/*
* Check whether there is more rbsp data for processing
*/
static inline bool CheckMoreRBSPData(PBitStringAux pBsAux) {
if ((pBsAux->iBits - ((pBsAux->pCurBuf - pBsAux->pStartBuf - 2) << 3) - pBsAux->iLeftBits) > 1) {
return true;
} else {
return false;
}
}
//define macros to check syntax elements
#define WELS_CHECK_SE_BOTH_ERROR(val, lower_bound, upper_bound, syntax_name, ret_code) do {\
if ((val < lower_bound) || (val > upper_bound)) {\

View File

@ -39,6 +39,7 @@
namespace WelsDec {
void IdctResAddPred_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
void IdctResAddPred8x8_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
#if defined(__cplusplus)
extern "C" {

View File

@ -91,8 +91,15 @@ typedef struct {
#define NEW_CTX_OFFSET_LAST 166
#define NEW_CTX_OFFSET_ONE 227
#define NEW_CTX_OFFSET_ABS 232
#define NEW_CTX_OFFSET_TS_8x8_FLAG 399
#define CTX_NUM_MVD 7
#define CTX_NUM_CBP 4
// Table 9-34 in Page 270
#define NEW_CTX_OFFSET_TRANSFORM_SIZE_8X8_FLAG 399
#define NEW_CTX_OFFSET_MAP_8x8 402
#define NEW_CTX_OFFSET_LAST_8x8 417
#define NEW_CTX_OFFSET_ONE_8x8 426
#define NEW_CTX_OFFSET_ABS_8x8 431 // Puzzle, where is the definition?
typedef struct TagDataBuffer {
uint8_t* pHead;
@ -131,6 +138,8 @@ typedef void (*PIdctResAddPredFunc) (uint8_t* pPred, const int32_t kiStride, int
typedef void (*PExpandPictureFunc) (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicWidth,
const int32_t kiPicHeight);
typedef void (*PGetIntraPred8x8Func) (uint8_t* pPred, const int32_t kiLumaStride, bool bTLAvail, bool bTRAvail);
/**/
typedef struct TagRefPic {
PPicture pRefList[LIST_A][MAX_REF_PIC_COUNT]; // reference picture marking plus FIFO scheme
@ -262,15 +271,18 @@ typedef struct TagWelsDecoderContext {
int16_t* pMbType[LAYER_NUM_EXCHANGEABLE]; /* mb type */
int16_t (*pMv[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM][MV_A]; //[LAYER_NUM_EXCHANGEABLE MB_BLOCK4x4_NUM*]
int8_t (*pRefIndex[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM];
bool* pNoSubMbPartSizeLessThan8x8Flag[LAYER_NUM_EXCHANGEABLE];
bool* pTransformSize8x8Flag[LAYER_NUM_EXCHANGEABLE];
int8_t* pLumaQp[LAYER_NUM_EXCHANGEABLE]; /*mb luma_qp*/
int8_t (*pChromaQp[LAYER_NUM_EXCHANGEABLE])[2]; /*mb chroma_qp*/
int16_t (*pMvd[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM][MV_A]; //[LAYER_NUM_EXCHANGEABLE MB_BLOCK4x4_NUM*]
uint8_t* pCbfDc[LAYER_NUM_EXCHANGEABLE];
uint16_t* pCbfDc[LAYER_NUM_EXCHANGEABLE];
int8_t (*pNzc[LAYER_NUM_EXCHANGEABLE])[24];
int8_t (*pNzcRs[LAYER_NUM_EXCHANGEABLE])[24];
int16_t (*pScaledTCoeff[LAYER_NUM_EXCHANGEABLE])[MB_COEFF_LIST_SIZE]; /*need be aligned*/
int8_t (*pIntraPredMode[LAYER_NUM_EXCHANGEABLE])[8]; //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
int8_t (*pIntra4x4FinalMode[LAYER_NUM_EXCHANGEABLE])[MB_BLOCK4x4_NUM];
uint8_t* pIntraNxNAvailFlag[LAYER_NUM_EXCHANGEABLE];
int8_t* pChromaPredMode[LAYER_NUM_EXCHANGEABLE];
int8_t* pCbp[LAYER_NUM_EXCHANGEABLE];
uint8_t (*pMotionPredFlag[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_PARTITION_SIZE]; // 8x8
@ -284,7 +296,6 @@ typedef struct TagWelsDecoderContext {
uint32_t iMbHeight;
} sMb;
// reconstruction picture
PPicture pDec; //pointer to current picture being reconstructed
@ -381,6 +392,9 @@ typedef struct TagWelsDecoderContext {
PGetIntraPredFunc pGetIChromaPredFunc[7]; // h264_predict_8x8_t
PIdctResAddPredFunc pIdctResAddPredFunc;
SMcFunc sMcFunc;
//Transform8x8
PGetIntraPred8x8Func pGetI8x8LumaPredFunc[14];
PIdctResAddPredFunc pIdctResAddPredFunc8x8;
//For error concealment
SCopyFunc sCopyFunc;
@ -395,8 +409,8 @@ typedef struct TagWelsDecoderContext {
int32_t iCurSeqIntervalMaxPicWidth;
int32_t iCurSeqIntervalMaxPicHeight;
PWelsFillNeighborMbInfoIntra4x4Func pFillInfoCacheIntra4x4Func;
PWelsMapNeighToSample pMap4x4NeighToSampleFunc;
PWelsFillNeighborMbInfoIntra4x4Func pFillInfoCacheIntraNxNFunc;
PWelsMapNeighToSample pMapNxNNeighToSampleFunc;
PWelsMap16NeighToSample pMap16x16NeighToSampleFunc;
//feedback whether or not have VCL in current AU, and the temporal ID

View File

@ -60,6 +60,21 @@ void WelsI4x4LumaPredVR_c (uint8_t* pPred, const int32_t kiStride);
void WelsI4x4LumaPredHU_c (uint8_t* pPred, const int32_t kiStride);
void WelsI4x4LumaPredHD_c (uint8_t* pPred, const int32_t kiStride);
void WelsI8x8LumaPredV_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsI8x8LumaPredH_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsI8x8LumaPredDc_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsI8x8LumaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsI8x8LumaPredDcTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsI8x8LumaPredDcNA_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsI8x8LumaPredDDL_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsI8x8LumaPredDDLTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsI8x8LumaPredDDR_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsI8x8LumaPredVL_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsI8x8LumaPredVLTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsI8x8LumaPredVR_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsI8x8LumaPredHU_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsI8x8LumaPredHD_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
void WelsIChromaPredV_c (uint8_t* pPred, const int32_t kiStride);
void WelsIChromaPredH_c (uint8_t* pPred, const int32_t kiStride);
void WelsIChromaPredPlane_c (uint8_t* pPred, const int32_t kiStride);
@ -96,8 +111,6 @@ void WelsDecoderIChromaPredV_mmx (uint8_t* pPred, const int32_t kiStride);
void WelsDecoderIChromaPredDcLeft_mmx (uint8_t* pPred, const int32_t kiStride);
void WelsDecoderIChromaPredDcNA_mmx (uint8_t* pPred, const int32_t kiStride);
void WelsDecoderI4x4LumaPredH_sse2 (uint8_t* pPred, const int32_t kiStride);
void WelsDecoderI4x4LumaPredDDR_mmx (uint8_t* pPred, const int32_t kiStride);
void WelsDecoderI4x4LumaPredHD_mmx (uint8_t* pPred, const int32_t kiStride);

View File

@ -167,12 +167,16 @@ typedef struct TagPps {
bool bRedundantPicCntPresentFlag;
bool bWeightedPredFlag;
uint8_t uiWeightedBipredIdc;
bool bTransform_8x8_mode_flag;
bool bTransform8x8ModeFlag;
//Add for scalinglist support
bool bPicScalingMatrixPresentFlag;
bool bPicScalingListPresentFlag[12];
uint8_t iScalingList4x4[6][16];
uint8_t iScalingList8x8[6][64];
int32_t iSecondChromaQPIndexOffset; //second_chroma_qp_index_offset
} SPps, *PPps;
} // namespace WelsDec

View File

@ -46,6 +46,7 @@ int32_t ParseEndOfSliceCabac (PWelsDecoderContext pCtx, uint32_t& uiBinVal);
int32_t ParseSkipFlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSkip);
int32_t ParseMBTypeISliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal);
int32_t ParseMBTypePSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal);
int32_t ParseTransformSize8x8FlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, bool& bTransformSize8x8Flag);
int32_t ParseSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType);
int32_t ParseIntraPredModeLumaCabac (PWelsDecoderContext pCtx, int32_t& iBinVal);
int32_t ParseIntraPredModeChromaCabac (PWelsDecoderContext pCtx, uint8_t uiNeighAvail, int32_t& iBinVal);
@ -66,6 +67,9 @@ int32_t ParseSignificantCoeffCabac (int32_t* significant, int32_t iResProperty,
int32_t ParseResidualBlockCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
int32_t index, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty, int16_t* sTCoeff, uint8_t uiQp,
PWelsDecoderContext pCtx);
int32_t ParseResidualBlockCabac8x8 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
int32_t index, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty, int16_t* sTCoeff, uint8_t uiQp,
PWelsDecoderContext pCtx);
int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx);
}
//#pragma pack()

View File

@ -53,9 +53,9 @@ namespace WelsDec {
void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer);
void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, PDqLayer pCurLayer);
void WelsFillCacheConstrain0Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
PDqLayer pCurLayer);
void WelsFillCacheConstrain1Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
PDqLayer pCurLayer);
void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
int16_t iMvArray[LIST_A][30][MV_A], int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer);
@ -64,29 +64,29 @@ void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
/*!
* \brief check iPredMode for intra16x16 eligible or not
* \param input : current iPredMode
* \param output: 0 indicating decoding correctly; -1 means error occurence
* \param input : current iPredMode
* \param output: 0 indicating decoding correctly; -1 means error occurence
*/
int32_t CheckIntra16x16PredMode (uint8_t uiSampleAvail, int8_t* pMode);
/*!
* \brief check iPredMode for intra4x4 eligible or not
* \param input : current iPredMode
* \param output: 0 indicating decoding correctly; -1 means error occurence
* \brief check iPredMode for intraNxN eligible or not
* \param input : current iPredMode
* \param output: 0 indicating decoding correctly; -1 means error occurence
*/
int32_t CheckIntra4x4PredMode (int32_t* pSampleAvail, int8_t* pMode, int32_t iIndex);
int32_t CheckIntraNxNPredMode (int32_t* pSampleAvail, int8_t* pMode, int32_t iIndex, bool b8x8);
/*!
* \brief check iPredMode for chroma eligible or not
* \param input : current iPredMode
* \param output: 0 indicating decoding correctly; -1 means error occurence
* \param input : current iPredMode
* \param output: 0 indicating decoding correctly; -1 means error occurence
*/
int32_t CheckIntraChromaPredMode (uint8_t uiSampleAvail, int8_t* pMode);
/*!
* \brief predict the mode of intra4x4
* \param input : current intra4x4 block index
* \param output: mode index
* \param input : current intra4x4 block index
* \param output: mode index
*/
int32_t PredIntra4x4Mode (int8_t* pIntraPredMode, int32_t iIdx4);
@ -107,10 +107,25 @@ int32_t WelsResidualBlockCavlc (SVlcTable* pVlcTable,
uint8_t uiQp,
PWelsDecoderContext pCtx);
// Transform8x8
int32_t WelsResidualBlockCavlc8x8 (SVlcTable* pVlcTable,
uint8_t* pNonZeroCountCache,
PBitStringAux pBs,
/*int16_t* coeff_level,*/
int32_t iIndex,
int32_t iMaxNumCoeff,
const uint8_t* kpZigzagTable,
int32_t iResidualProperty,
/*short *tCoeffLevel,*/
int16_t* pTCoeff,
int32_t iIdx4x4,
uint8_t uiQp,
PWelsDecoderContext pCtx);
/*!
* \brief parsing inter info (including ref_index and pMvd)
* \param input : decoding context, current mb, bit-stream
* \param output: 0 indicating decoding correctly; -1 means error
* \param input : decoding context, current mb, bit-stream
* \param output: 0 indicating decoding correctly; -1 means error
*/
int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30],
PBitStringAux pBs);

View File

@ -78,6 +78,10 @@ int32_t RecI4x4Luma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLe
int32_t RecI4x4Chroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);
int32_t RecI8x8Mb (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);
int32_t RecI8x8Luma (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);
int32_t RecI16x16Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);
int32_t RecChroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);

View File

@ -72,16 +72,19 @@ typedef int32_t SubMbType;
#define LUMA_DC_AC 3
#define CHROMA_DC 4
#define CHROMA_AC 5
#define CHROMA_DC_U 6
#define CHROMA_DC_V 7
#define CHROMA_AC_U 8
#define CHROMA_AC_V 9
#define LUMA_DC_AC_INTRA 10
#define LUMA_DC_AC_INTER 11
#define CHROMA_DC_U_INTER 12
#define CHROMA_DC_V_INTER 13
#define CHROMA_AC_U_INTER 14
#define CHROMA_AC_V_INTER 15
#define LUMA_DC_AC_8 6
#define CHROMA_DC_U 7
#define CHROMA_DC_V 8
#define CHROMA_AC_U 9
#define CHROMA_AC_V 10
#define LUMA_DC_AC_INTRA 11
#define LUMA_DC_AC_INTER 12
#define CHROMA_DC_U_INTER 13
#define CHROMA_DC_V_INTER 14
#define CHROMA_AC_U_INTER 15
#define CHROMA_AC_V_INTER 16
#define LUMA_DC_AC_INTRA_8 17
#define LUMA_DC_AC_INTER_8 18
#define SHIFT_BUFFER(pBitsCache) { pBitsCache->pBuf+=2; pBitsCache->uiRemainBits += 16; pBitsCache->uiCache32Bit |= (((pBitsCache->pBuf[2] << 8) | pBitsCache->pBuf[3]) << (32 - pBitsCache->uiRemainBits)); }
#define POP_BUFFER(pBitsCache, iCount) { pBitsCache->uiCache32Bit <<= iCount; pBitsCache->uiRemainBits -= iCount; }
@ -93,6 +96,38 @@ static const uint8_t g_kuiZigzagScan[16] = { //4*4block residual zig-zag scan or
7, 11, 14, 15,
};
static const uint8_t g_kuiZigzagScan8x8[64] = { //8x8 block residual zig-zag scan order
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63,
};
static const uint8_t g_kuiIdx2CtxSignificantCoeffFlag8x8[64] = { // Table 9-43, Page 289
0, 1, 2, 3, 4, 5, 5, 4,
4, 3, 3, 4, 4, 4, 5, 5,
4, 4, 4, 4, 3, 3, 6, 7,
7, 7, 8, 9, 10, 9, 8, 7,
7, 6, 11, 12, 13, 11, 6, 7,
8, 9, 14, 10, 9, 8, 6, 11,
12, 13, 11, 6, 9, 14, 10, 9,
11, 12, 13, 11 ,14, 10, 12, 14,
};
static const uint8_t g_kuiIdx2CtxLastSignificantCoeffFlag8x8[64] = { // Table 9-43, Page 289
0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 6, 6, 6, 6,
7, 7, 7, 7, 8, 8, 8, 8,
};
static inline void GetMbResProperty(int32_t * pMBproperty,int32_t* pResidualProperty,bool bCavlc)
{
@ -142,8 +177,17 @@ static inline void GetMbResProperty(int32_t * pMBproperty,int32_t* pResidualProp
break;
case CHROMA_AC_V_INTER:
*pMBproperty = 5;
*pResidualProperty = bCavlc ?CHROMA_AC:CHROMA_AC_V;
*pResidualProperty = bCavlc ? CHROMA_AC : CHROMA_AC_V;
break;
// Reference to Table 7-2
case LUMA_DC_AC_INTRA_8:
*pMBproperty = 6;
*pResidualProperty = LUMA_DC_AC_8;
break;
case LUMA_DC_AC_INTER_8:
*pMBproperty = 7;
*pResidualProperty = LUMA_DC_AC_8;
break;
}
}

View File

@ -124,6 +124,7 @@ uint8_t* ParseNalHeader (PWelsDecoderContext pCtx, SNalUnitHeader* pNalUnitHeade
uiBsZero = pSrcRbsp[iIndex];
if (0 == uiBsZero) {
--iNalSize;
++ (*pConsumedBytes);
--iIndex;
} else {
break;
@ -991,16 +992,10 @@ int32_t ParseSps (PWelsDecoderContext pCtx, PBitStringAux pBsAux, int32_t* pPicW
WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //seq_scaling_matrix_present_flag
pSps->bSeqScalingMatrixPresentFlag = !!uiCode;
if (pSps->bSeqScalingMatrixPresentFlag)// For high profile, it is not used in current application. FIXME
if (pSps->bSeqScalingMatrixPresentFlag) {
WELS_READ_VERIFY (ParseScalingList (pSps, pBs, 0, pSps->bSeqScalingListPresentFlag, pSps->iScalingList4x4,
pSps->iScalingList8x8));
//if exist, to parse scalinglist matrix value
// WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
// "ParseSps(): seq_scaling_matrix_present_flag (%d). Feature not supported.",
// pSps->bSeqScalingMatrixPresentFlag);
//return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_NON_BASELINE);
}
}
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //log2_max_frame_num_minus4
WELS_CHECK_SE_UPPER_ERROR (uiCode, SPS_LOG2_MAX_FRAME_NUM_MINUS4_MAX, "log2_max_frame_num_minus4",
@ -1379,28 +1374,27 @@ int32_t ParsePps (PWelsDecoderContext pCtx, PPps pPpsList, PBitStringAux pBsAux,
pPps->bConstainedIntraPredFlag = !!uiCode;
WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //redundant_pic_cnt_present_flag
pPps->bRedundantPicCntPresentFlag = !!uiCode;
/*TODO: to judge whether going on to parse*/
//going on to parse high profile syntax, need fix me
if (0) {
WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode));
pPps->bTransform_8x8_mode_flag = !!uiCode;
WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode));
if (CheckMoreRBSPData (pBsAux)) {
WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //transform_8x8_mode_flag
pPps->bTransform8x8ModeFlag = !!uiCode;
WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //pic_scaling_matrix_present_flag
pPps->bPicScalingMatrixPresentFlag = !!uiCode;
if (pPps->bPicScalingMatrixPresentFlag) {
if (pCtx->bSpsAvailFlags[pPps->iSpsId])
if (pCtx->bSpsAvailFlags[pPps->iSpsId]) {
WELS_READ_VERIFY (ParseScalingList (&pCtx->sSpsBuffer[pPps->iSpsId], pBsAux, 1, pPps->bPicScalingListPresentFlag,
pPps->iScalingList4x4, pPps->iScalingList8x8));
else {
} else {
pCtx->bSpsLatePps = true;
WELS_READ_VERIFY (ParseScalingList (NULL, pBsAux, 1, pPps->bPicScalingListPresentFlag, pPps->iScalingList4x4,
pPps->iScalingList8x8));
}
}
//add second chroma qp parsing process
WELS_READ_VERIFY (BsGetSe (pBsAux, &iCode)); //chroma_qp_index_offset,cr
pPps->iChromaQpIndexOffset[1] = iCode;
WELS_CHECK_SE_BOTH_ERROR (pPps->iChromaQpIndexOffset[1], PPS_CHROMA_QP_INDEX_OFFSET_MIN, PPS_CHROMA_QP_INDEX_OFFSET_MAX,
"second_chroma_qp_index_offset", GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_CHROMA_QP_INDEX_OFFSET));
WELS_READ_VERIFY (BsGetSe (pBsAux, &iCode)); //second_chroma_qp_index_offset
pPps->iChromaQpIndexOffset[1] = iCode;
WELS_CHECK_SE_BOTH_ERROR (pPps->iChromaQpIndexOffset[1], PPS_CHROMA_QP_INDEX_OFFSET_MIN,
PPS_CHROMA_QP_INDEX_OFFSET_MAX, "chroma_qp_index_offset", GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS,
ERR_INFO_INVALID_CHROMA_QP_INDEX_OFFSET));
}
if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) {
@ -1481,6 +1475,7 @@ int32_t SetScalingListValue (uint8_t* pScalingList, int iScalingListNum, bool* b
int iNextScale = 8;
int iDeltaScale;
int32_t iCode;
int32_t iIdx;
for (int j = 0; j < iScalingListNum; j++) {
if (iNextScale != 0) {
WELS_READ_VERIFY (BsGetSe (pBsAux, &iCode));
@ -1492,8 +1487,9 @@ int32_t SetScalingListValue (uint8_t* pScalingList, int iScalingListNum, bool* b
if (*bUseDefaultScalingMatrixFlag)
break;
}
pScalingList[g_kuiZigzagScan[j]] = (iNextScale == 0) ? iLastScale : iNextScale;
iLastScale = pScalingList[g_kuiZigzagScan[j]];
iIdx = iScalingListNum == 16 ? g_kuiZigzagScan[j] : g_kuiZigzagScan8x8[j];
pScalingList[iIdx] = (iNextScale == 0) ? iLastScale : iNextScale;
iLastScale = pScalingList[iIdx];
}

View File

@ -74,7 +74,7 @@ int32_t InitCabacDecEngineFromBS (PWelsCabacDecEngine pDecEngine, PBitStringAux
uint8_t* pCurr;
pCurr = pBsAux->pCurBuf - iRemainingBytes;
if(pCurr >= (pBsAux->pEndBuf - 1)) {
if (pCurr >= (pBsAux->pEndBuf - 1)) {
return ERR_INFO_INVALID_ACCESS;
}
pDecEngine->uiOffset = ((pCurr[0] << 16) | (pCurr[1] << 8) | pCurr[2]);

View File

@ -136,6 +136,19 @@ static const uint8_t g_kuiTableBIdx[2][8] = {
},
};
static const uint8_t g_kuiTableB8x8Idx[2][16] = {
{
0, 1, 4, 5, 8, 9, 12, 13, // 0 1 | 2 3
2, 3, 6, 7, 10, 11, 14, 15 // 4 5 | 6 7
}, // ------------
// 8 9 | 10 11
{
// 12 13 | 14 15
0, 1, 4, 5, 2, 3, 6, 7,
8, 9, 12, 13, 10, 11, 14, 15
},
};
#define TC0_TBL_LOOKUP(tc, iIndexA, pBS, bChroma) \
{\
tc[0] = g_kiTc0Table(iIndexA)[pBS[0]] + bChroma;\
@ -170,7 +183,22 @@ void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t nBS[2][4][4],
nBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor;
nBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor;
* (uint32_t*)nBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor;
}
void inline DeblockingBSInsideMBAvsbase8x8 (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
int8_t i8x8NnzTab[4];
for (int32_t i = 0; i < 4; i++) {
int32_t iBlkIdx = i << 2;
i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
}
//vertical
nBS[0][2][0] = nBS[0][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[1]) << iLShiftFactor;
nBS[0][2][2] = nBS[0][2][3] = (i8x8NnzTab[2] | i8x8NnzTab[3]) << iLShiftFactor;
//horizontal
nBS[1][2][0] = nBS[1][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[2]) << iLShiftFactor;
nBS[1][2][2] = nBS[1][2][3] = (i8x8NnzTab[1] | i8x8NnzTab[3]) << iLShiftFactor;
}
void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab,
@ -179,73 +207,148 @@ void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS
int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy];
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
int8_t i8x8NnzTab[4];
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0);
nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1);
nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2);
if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
for (int32_t i = 0; i < 4; i++) {
int32_t iBlkIdx = i << 2;
i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
}
//vertical
nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
g_kuiMbCountScan4Idx[1 << 2], g_kuiMbCountScan4Idx[0]);
nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[2 << 2]);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4);
nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5);
nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6);
//horizontal
nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
g_kuiMbCountScan4Idx[2 << 2], g_kuiMbCountScan4Idx[0]);
nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[1 << 2]);
} else {
uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8);
nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9);
nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0);
nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1);
nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12);
nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13);
nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4);
nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5);
nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6);
// horizontal
* (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0);
nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1);
nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2);
nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8);
nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9);
nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10);
* (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4);
nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5);
nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6);
nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12);
nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13);
nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14);
* (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8);
nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9);
nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10);
nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11);
// horizontal
* (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0);
nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1);
nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2);
nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3);
* (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4);
nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5);
nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6);
nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7);
* (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8);
nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9);
nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10);
nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11);
}
}
uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy) {
int32_t i;
int32_t i, j;
uint32_t uiBSx4;
//uint8_t* bS = static_cast<uint8_t*>(&uiBSx4);
uint8_t* pBS = (uint8_t*) (&uiBSx4);
const uint8_t* pBIdx = &g_kuiTableBIdx[iEdge][0];
const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
const uint8_t* pBIdx = &g_kuiTableBIdx[iEdge][0];
const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0];
const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8];
for (i = 0; i < 4; i++) {
if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
pBS[i] = 2;
} else {
pBS[i] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx,
*pBnIdx);
if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
uiNzc |= (pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)] | pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]);
}
if (uiNzc) {
pBS[i << 1] = pBS[1 + (i << 1)] = 2;
} else {
pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb,
*pB8x8Idx, *pBn8x8Idx);
}
pB8x8Idx += 4;
pBn8x8Idx += 4;
}
} else if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
uiNzc |= pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)];
}
for (j = 0; j < 2; j++) {
if (uiNzc | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
pBS[j + (i << 1)] = 2;
} else {
pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pB8x8Idx,
*pBnIdx);
}
pBnIdx++;
}
pB8x8Idx += 4;
}
} else if (pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
uiNzc |= pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)];
}
for (j = 0; j < 2; j++) {
if (uiNzc | pCurDqLayer->pNzc[iMbXy][*pBIdx]) {
pBS[j + (i << 1)] = 2;
} else {
pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx,
*pBn8x8Idx);
}
pBIdx++;
}
pBn8x8Idx += 4;
}
} else {
// only 4x4 transform
for (i = 0; i < 4; i++) {
if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
pBS[i] = 2;
} else {
pBS[i] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx,
*pBnIdx);
}
pBIdx++;
pBnIdx++;
}
pBIdx++;
pBnIdx++;
}
return uiBSx4;
}
int32_t DeblockingAvailableNoInterlayer (PDqLayer pCurDqLayer, int32_t iFilterIdc) {
@ -501,7 +604,7 @@ void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_
pFilter->iChromaQP[0] = pCurChromaQp[0];
pFilter->iChromaQP[1] = pCurChromaQp[1];
if (* (uint32_t*)nBS[0][1] != 0) {
if (* (uint32_t*)nBS[0][1] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
FilteringEdgeLumaV (pFilter, &pDestY[1 << 2], iLineSize, nBS[0][1]);
}
@ -510,7 +613,7 @@ void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_
FilteringEdgeChromaV (pFilter, &pDestCb[2 << 1], &pDestCr[2 << 1], iLineSizeUV, nBS[0][2]);
}
if (* (uint32_t*)nBS[0][3] != 0) {
if (* (uint32_t*)nBS[0][3] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
FilteringEdgeLumaV (pFilter, &pDestY[3 << 2], iLineSize, nBS[0][3]);
}
@ -536,7 +639,7 @@ void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_
pFilter->iChromaQP[0] = pCurChromaQp[0];
pFilter->iChromaQP[1] = pCurChromaQp[1];
if (* (uint32_t*)nBS[1][1] != 0) {
if (* (uint32_t*)nBS[1][1] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
FilteringEdgeLumaH (pFilter, &pDestY[ (1 << 2)*iLineSize], iLineSize, nBS[1][1]);
}
@ -546,7 +649,7 @@ void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_
nBS[1][2]);
}
if (* (uint32_t*)nBS[1][3] != 0) {
if (* (uint32_t*)nBS[1][3] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
FilteringEdgeLumaH (pFilter, &pDestY[ (3 << 2)*iLineSize], iLineSize, nBS[1][3]);
}
}
@ -581,9 +684,16 @@ void FilteringEdgeLumaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int3
iBeta);
if (iAlpha | iBeta) {
TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 0);
pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc);
if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc);
}
pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[2 << 2], iLineSize, iAlpha, iBeta, iTc);
pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc);
if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc);
}
}
// luma h
@ -594,9 +704,15 @@ void FilteringEdgeLumaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int3
pFilter->iLumaQP = iCurQp;
if (iAlpha | iBeta) {
pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
}
pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (2 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
}
}
}
void FilteringEdgeChromaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) {
@ -705,6 +821,7 @@ void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t
switch (iCurMbType) {
case MB_TYPE_INTRA4x4:
case MB_TYPE_INTRA8x8:
case MB_TYPE_INTRA16x16:
case MB_TYPE_INTRA_PCM:
DeblockingIntraMb (pCurDqLayer, pFilter, iBoundryFlag);
@ -728,7 +845,11 @@ void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t
//SKIP MB_16x16 or others
if (iCurMbType != MB_TYPE_SKIP) {
if (iCurMbType == MB_TYPE_16x16) {
DeblockingBSInsideMBAvsbase (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
if (!pCurDqLayer->pTransformSize8x8Flag[pCurDqLayer->iMbXyIndex]) {
DeblockingBSInsideMBAvsbase (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
} else {
DeblockingBSInsideMBAvsbase8x8 (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
}
} else {
DeblockingBSInsideMBNormal (pCurDqLayer, nBS, pCurDqLayer->pNzc[iMbXyIndex], iMbXyIndex);
}
@ -839,8 +960,8 @@ void DeblockingInit (SDeblockingFunc* pFunc, int32_t iCpu) {
if (iCpu & WELS_CPU_SSSE3) {
pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_ssse3;
pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_ssse3;
pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_ssse3;
pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_ssse3;
pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_ssse3;
pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_ssse3;
pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_ssse3;
pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_ssse3;
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_ssse3;

View File

@ -76,6 +76,96 @@ void IdctResAddPred_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs) {
}
}
void IdctResAddPred8x8_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs) {
// To make the ASM code easy to write, should using one funciton to apply hor and ver together, such as we did on HEVC
// Ugly code, just for easy debug, the final version need optimization
int16_t p[8], b[8];
int16_t a[4];
int16_t iTmp[64];
int16_t iRes[64];
// Horizontal
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
p[j] = pRs[j + (i << 3)];
}
a[0] = p[0] + p[4];
a[1] = p[0] - p[4];
a[2] = p[6] - (p[2] >> 1);
a[3] = p[2] + (p[6] >> 1);
b[0] = a[0] + a[3];
b[2] = a[1] - a[2];
b[4] = a[1] + a[2];
b[6] = a[0] - a[3];
a[0] = -p[3] + p[5] - p[7] - (p[7] >> 1);
a[1] = p[1] + p[7] - p[3] - (p[3] >> 1);
a[2] = -p[1] + p[7] + p[5] + (p[5] >> 1);
a[3] = p[3] + p[5] + p[1] + (p[1] >> 1);
b[1] = a[0] + (a[3] >> 2);
b[3] = a[1] + (a[2] >> 2);
b[5] = a[2] - (a[1] >> 2);
b[7] = a[3] - (a[0] >> 2);
iTmp[0 + (i << 3)] = b[0] + b[7];
iTmp[1 + (i << 3)] = b[2] - b[5];
iTmp[2 + (i << 3)] = b[4] + b[3];
iTmp[3 + (i << 3)] = b[6] + b[1];
iTmp[4 + (i << 3)] = b[6] - b[1];
iTmp[5 + (i << 3)] = b[4] - b[3];
iTmp[6 + (i << 3)] = b[2] + b[5];
iTmp[7 + (i << 3)] = b[0] - b[7];
}
//Vertical
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
p[j] = iTmp[i + (j << 3)];
}
a[0] = p[0] + p[4];
a[1] = p[0] - p[4];
a[2] = p[6] - (p[2] >> 1);
a[3] = p[2] + (p[6] >> 1);
b[0] = a[0] + a[3];
b[2] = a[1] - a[2];
b[4] = a[1] + a[2];
b[6] = a[0] - a[3];
a[0] = -p[3] + p[5] - p[7] - (p[7] >> 1);
a[1] = p[1] + p[7] - p[3] - (p[3] >> 1);
a[2] = -p[1] + p[7] + p[5] + (p[5] >> 1);
a[3] = p[3] + p[5] + p[1] + (p[1] >> 1);
b[1] = a[0] + (a[3] >> 2);
b[7] = a[3] - (a[0] >> 2);
b[3] = a[1] + (a[2] >> 2);
b[5] = a[2] - (a[1] >> 2);
iRes[ (0 << 3) + i] = b[0] + b[7];
iRes[ (1 << 3) + i] = b[2] - b[5];
iRes[ (2 << 3) + i] = b[4] + b[3];
iRes[ (3 << 3) + i] = b[6] + b[1];
iRes[ (4 << 3) + i] = b[6] - b[1];
iRes[ (5 << 3) + i] = b[4] - b[3];
iRes[ (6 << 3) + i] = b[2] + b[5];
iRes[ (7 << 3) + i] = b[0] - b[7];
}
uint8_t* pDst = pPred;
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
pDst[i * kiStride + j] = WelsClip1 (((32 + iRes[ (i << 3) + j]) >> 6) + pDst[i * kiStride + j]);
}
}
}
void GetI4LumaIChromaAddrTable (int32_t* pBlockOffset, const int32_t kiYStride, const int32_t kiUVStride) {
int32_t* pOffset = pBlockOffset;
int32_t i;

View File

@ -144,7 +144,6 @@ int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) {
return 0;//NO_SUPPORTED_FILTER_IDX
} else {
WelsDeblockingFilterSlice (pCtx, pDeblockMb);
}
// any other filter_idc not supported here, 7/22/2010
@ -159,11 +158,22 @@ int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLa
WelsChromaDcIdct (pCurLayer->pScaledTCoeff[iMbXy] + 256); // 256 = 16*16
WelsChromaDcIdct (pCurLayer->pScaledTCoeff[iMbXy] + 320); // 320 = 16*16 + 16*4
for (i = 0; i < 16; i++) { //luma
iIndex = g_kuiMbCountScan4Idx[i];
if (pCurLayer->pNzc[iMbXy][iIndex]) {
iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2);
pCtx->pIdctResAddPredFunc (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4));
if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
for (i = 0; i < 4; i++) {
iIndex = g_kuiMbCountScan4Idx[i << 2];
if (pCurLayer->pNzc[iMbXy][iIndex] || pCurLayer->pNzc[iMbXy][iIndex + 1] || pCurLayer->pNzc[iMbXy][iIndex + 4]
|| pCurLayer->pNzc[iMbXy][iIndex + 5]) {
iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2);
pCtx->pIdctResAddPredFunc8x8 (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 6));
}
}
} else {
for (i = 0; i < 16; i++) { //luma
iIndex = g_kuiMbCountScan4Idx[i];
if (pCurLayer->pNzc[iMbXy][iIndex]) {
iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2);
pCtx->pIdctResAddPredFunc (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4));
}
}
}
@ -258,6 +268,10 @@ int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pC
return 0;
}
if (IS_INTRA8x8 (pCurLayer->pMbType[iMbXy])) {
RecI8x8Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
}
if (IS_INTRA4x4 (pCurLayer->pMbType[iMbXy]))
RecI4x4Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
@ -326,7 +340,7 @@ void WelsChromaDcIdct (int16_t* pBlock) {
pBlk[iStride1] = (iE - iB) >> 1;
}
void WelsMap4x4NeighToSampleNormal (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
void WelsMapNxNNeighToSampleNormal (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
if (pNeighAvail->iLeftAvail) { //left
pSampleAvail[ 6] =
pSampleAvail[12] =
@ -347,7 +361,7 @@ void WelsMap4x4NeighToSampleNormal (PWelsNeighAvail pNeighAvail, int32_t* pSampl
}
}
void WelsMap4x4NeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
void WelsMapNxNNeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) { //left
pSampleAvail[ 6] =
pSampleAvail[12] =
@ -401,7 +415,7 @@ int32_t ParseIntra4x4Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail
uint8_t uiNeighAvail = 0;
uint32_t uiCode;
int32_t iCode;
pCtx->pMap4x4NeighToSampleFunc (pNeighAvail, iSampleAvail);
pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
uiNeighAvail = (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
for (i = 0; i < 16; i++) {
int32_t iPrevIntra4x4PredMode = 0;
@ -429,7 +443,7 @@ int32_t ParseIntra4x4Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail
}
}
iFinalMode = CheckIntra4x4PredMode (&iSampleAvail[0], &iBestMode, i);
iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i, false);
if (iFinalMode == ERR_INVALID_INTRA4X4_MODE) {
return ERR_INFO_INVALID_I4x4_PRED_MODE;
}
@ -469,6 +483,87 @@ int32_t ParseIntra4x4Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail
return ERR_NONE;
}
int32_t ParseIntra8x8Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
PBitStringAux pBs,
PDqLayer pCurDqLayer) {
// Similar with Intra_4x4, can put them together when needed
int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
int32_t iMbXy = pCurDqLayer->iMbXyIndex;
int32_t iFinalMode, i;
uint8_t uiNeighAvail = 0;
uint32_t uiCode;
int32_t iCode;
pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
// Top-Right : Left : Top-Left : Top
uiNeighAvail = (iSampleAvail[5] << 3) | (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
pCurDqLayer->pIntraNxNAvailFlag[iMbXy] = uiNeighAvail;
for (i = 0; i < 4; i++) {
int32_t iPrevIntra4x4PredMode = 0;
if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
iPrevIntra4x4PredMode = iCode;
} else {
WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
iPrevIntra4x4PredMode = uiCode;
}
const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i << 2);
int8_t iBestMode;
if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
if (iPrevIntra4x4PredMode == -1)
iBestMode = kiPredMode;
else
iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
} else {
if (iPrevIntra4x4PredMode) {
iBestMode = kiPredMode;
} else {
WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode);
}
}
iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i << 2, true);
if (iFinalMode == ERR_INVALID_INTRA4X4_MODE) {
return ERR_INFO_INVALID_I4x4_PRED_MODE;
}
for (int j = 0; j < 4; j++) {
pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[ (i << 2) + j]] = iFinalMode;
pIntraPredMode[g_kuiScan8[ (i << 2) + j]] = iBestMode;
iSampleAvail[g_kuiCache30ScanIdx[ (i << 2) + j]] = 1;
}
}
ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
if (iCode > MAX_PRED_MODE_ID_CHROMA) {
return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
}
pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
} else {
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
}
pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
}
if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
|| CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
}
return ERR_NONE;
}
int32_t ParseIntra16x16Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, PBitStringAux pBs,
PDqLayer pCurDqLayer) {
int32_t iMbXy = pCurDqLayer->iMbXyIndex;
@ -519,6 +614,9 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui
ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
@ -539,8 +637,18 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui
} else if (0 == uiMbType) { //I4x4
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
if (pCtx->pPps->bTransform8x8ModeFlag) {
// Transform 8x8 cabac will be added soon
WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, &sNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
}
if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
} else {
pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
}
//get uiCbp for I4x4
WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, &sNeighAvail, uiCbp));
pCurLayer->pCbp[iMbXy] = uiCbp;
@ -549,6 +657,8 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui
uiCbpLuma = uiCbp & 15;
} else { //I16x16;
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ;
@ -565,7 +675,7 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui
ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
pCurLayer->pCbfDc[iMbXy] = 0;
if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRA4x4 (pCurLayer->pMbType[iMbXy])) {
if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) {
pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
for (i = 0; i < 2; i++) {
pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurLayer->pLumaQp[iMbXy] +
@ -608,26 +718,43 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui
ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
}
} else { //non-MB_TYPE_INTRA16x16
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
if (uiCbpLuma & (1 << iId8x8)) {
int32_t iIdx = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
//Luma (DC and AC decoding together)
WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
pCurLayer->pLumaQp[iMbXy],
pCtx));
iIdx++;
if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
// Transform 8x8 support for CABAC
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
if (uiCbpLuma & (1 << iId8x8)) {
WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (&sNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, LUMA_DC_AC_INTRA_8,
pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
} else {
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
} else {
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
} else {
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
if (uiCbpLuma & (1 << iId8x8)) {
int32_t iIdx = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
//Luma (DC and AC decoding together)
WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
pCurLayer->pLumaQp[iMbXy], pCtx));
iIdx++;
}
} else {
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
}
ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
int32_t iMbResProperty;
//chroma
@ -730,10 +857,21 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
if (0 == uiMbType) { //Intra4x4
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
pCtx->pFillInfoCacheIntra4x4Func (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
if (pCtx->pPps->bTransform8x8ModeFlag) {
WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
}
if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
} else {
pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
}
} else { //Intra16x16
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
@ -761,6 +899,23 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
}
if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
// Need modification when B picutre add in
bool bNeedParseTransformSize8x8Flag =
(((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
|| pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
&& (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
&& (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
&& ((pCurLayer->pCbp[iMbXy] & 0x0F) > 0)
&& (pCtx->pPps->bTransform8x8ModeFlag));
if (bNeedParseTransformSize8x8Flag) {
WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail,
pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); //transform_size_8x8_flag
}
}
memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
int32_t iQpDelta, iId8x8, iId4x4;
@ -798,27 +953,46 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
}
} else { //non-MB_TYPE_INTRA16x16
iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
if (uiCbpLuma & (1 << iId8x8)) {
int32_t iIdx = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
//Luma (DC and AC decoding together)
WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
pCurLayer->pLumaQp[iMbXy],
pCtx));
iIdx++;
if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
// Transform 8x8 support for CABAC
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
if (uiCbpLuma & (1 << iId8x8)) {
WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart,
IS_INTRA (pCurLayer->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
} else {
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
} else {
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
} else {
iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
if (uiCbpLuma & (1 << iId8x8)) {
int32_t iIdx = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
//Luma (DC and AC decoding together)
WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
pCurLayer->pLumaQp[iMbXy],
pCtx));
iIdx++;
}
} else {
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
}
ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
//chroma
@ -886,8 +1060,12 @@ int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin
pCurLayer->pCbfDc[iMbXy] = 0;
pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
GetNeighborAvailMbType (&uiNeighAvail, pCurLayer);
WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
if (uiCode) {
int16_t pMv[2] = {0};
pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
@ -943,30 +1121,23 @@ int32_t WelsCalcDeqCoeffScalingList (PWelsDecoderContext pCtx) {
//if (!pCtx->pSps->bSeqScalingListPresentFlag[i]) {
if (!pCtx->pPps->bPicScalingListPresentFlag[i]) {
if (i < 6) {
if (i == 0 || i == 3)
memcpy (pCtx->pPps->iScalingList4x4[i], pCtx->pSps->iScalingList4x4[i], 16 * sizeof (uint8_t));
else
memcpy (pCtx->pPps->iScalingList4x4[i], pCtx->pPps->iScalingList4x4[i - 1], 16 * sizeof (uint8_t));
} else {
if (i == 6 || i == 7)
memcpy (pCtx->pPps->iScalingList8x8[ i - 6 ], pCtx->pSps->iScalingList8x8[ i - 6 ], 64 * sizeof (uint8_t));
else
memcpy (pCtx->pPps->iScalingList8x8[ i - 6 ], pCtx->pPps->iScalingList8x8[i - 8], 64 * sizeof (uint8_t));
}
}
}
}
//Init dequant coeff value for different QP
for (i = 0; i < 6; i++) {
pCtx->pDequant_coeff4x4[i] = pCtx->pDequant_coeff_buffer4x4[i];
pCtx->pDequant_coeff8x8[i] = pCtx->pDequant_coeff_buffer8x8[i];
for (q = 0; q < 51; q++) {
for (x = 0; x < 16; x++) {
pCtx->pDequant_coeff4x4[i][q][x] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList4x4[i][x] *
@ -974,13 +1145,10 @@ int32_t WelsCalcDeqCoeffScalingList (PWelsDecoderContext pCtx) {
}
for (y = 0; y < 64; y++) {
pCtx->pDequant_coeff8x8[i][q][y] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList8x8[i][y] *
g_kuiDequantCoeff[q][x & 0x07] : pCtx->pSps->iScalingList8x8[i][y] * g_kuiDequantCoeff[q][x &
0x07];//pseudo-code ,holding for 8x8transform into
g_kuiMatrixV[q % 6][y / 8][y % 8] : pCtx->pSps->iScalingList8x8[i][y] * g_kuiMatrixV[q % 6][y / 8][y % 8];
}
}
}
pCtx->bDequantCoeff4x4Init = true;
pCtx->iDequantCoeffPpsid = pCtx->pPps->iPpsId;
}
@ -1027,12 +1195,12 @@ int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNal
}
if (pSliceHeader->pPps->bConstainedIntraPredFlag) {
pCtx->pFillInfoCacheIntra4x4Func = WelsFillCacheConstrain1Intra4x4;
pCtx->pMap4x4NeighToSampleFunc = WelsMap4x4NeighToSampleConstrain1;
pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain1IntraNxN;
pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleConstrain1;
pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleConstrain1;
} else {
pCtx->pFillInfoCacheIntra4x4Func = WelsFillCacheConstrain0Intra4x4;
pCtx->pMap4x4NeighToSampleFunc = WelsMap4x4NeighToSampleNormal;
pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain0IntraNxN;
pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleNormal;
pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleNormal;
}
@ -1117,6 +1285,9 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
uiMbType = uiCode;
if (uiMbType > 25)
@ -1178,8 +1349,20 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
} else if (0 == uiMbType) { //reference to JM
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
if (pCtx->pPps->bTransform8x8ModeFlag) {
WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
}
}
if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
} else {
pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
}
//uiCbp
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
@ -1199,6 +1382,8 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
uiCbpL = uiCbp & 15;
} else { //I_PCM exclude, we can ignore it
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
@ -1266,27 +1451,51 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
} else { //non-MB_TYPE_INTRA16x16
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
if (uiCbpL & (1 << iId8x8)) {
int32_t iIndex = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
//Luma (DC and AC decoding together)
if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex,
iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan + iScanIdxStart,
LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), pCurLayer->pLumaQp[iMbXy], pCtx)) {
return -1;//abnormal
if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
if (uiCbpL & (1 << iId8x8)) {
int32_t iIndex = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
if (WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex,
iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty,
pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4, pCurLayer->pLumaQp[iMbXy], pCtx)) {
return -1;
}
iIndex++;
}
iIndex++;
} else {
ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
}
} else {
ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2)]], 0);
ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
}
ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
} else {
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
if (uiCbpL & (1 << iId8x8)) {
int32_t iIndex = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
//Luma (DC and AC decoding together)
if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex,
iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan + iScanIdxStart,
LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), pCurLayer->pLumaQp[iMbXy], pCtx)) {
return -1;//abnormal
}
iIndex++;
}
} else {
ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2)]], 0);
ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
}
}
ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
//chroma
@ -1399,6 +1608,7 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
int8_t iRefIndex[LIST_A][30];
pCurLayer->pMbType[iMbXy] = g_ksInterMbTypeInfo[uiMbType].iType;
WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurLayer);
if (ParseInterInfo (pCtx, iMotionVector, iRefIndex, pBs)) {
return -1;//abnormal
}
@ -1484,12 +1694,24 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
if (0 == uiMbType) {
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
if (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer)) {
return -1;
if (pCtx->pPps->bTransform8x8ModeFlag) {
WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
}
}
if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
} else {
pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
}
} else { //I_PCM exclude, we can ignore it
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
@ -1510,7 +1732,7 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
return ERR_INFO_INVALID_CBP;
if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
return ERR_INFO_INVALID_CBP;
if (MB_TYPE_INTRA4x4 == pCurLayer->pMbType[iMbXy]) {
if (MB_TYPE_INTRA4x4 == pCurLayer->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurLayer->pMbType[iMbXy]) {
uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp];
} else //inter
@ -1520,6 +1742,20 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
pCurLayer->pCbp[iMbXy] = uiCbp;
uiCbpC = pCurLayer->pCbp[iMbXy] >> 4;
uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
// Need modification when B picutre add in
bool bNeedParseTransformSize8x8Flag =
(((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
|| pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
&& (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
&& (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
&& (uiCbpL > 0)
&& (pCtx->pPps->bTransform8x8ModeFlag));
if (bNeedParseTransformSize8x8Flag) {
WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
}
}
ST32A4 (&pNzc[0], 0);
@ -1577,28 +1813,52 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
} else { //non-MB_TYPE_INTRA16x16
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
if (uiCbpL & (1 << iId8x8)) {
int32_t iIndex = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
//Luma (DC and AC decoding together)
if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex,
iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan + iScanIdxStart, iMbResProperty,
pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), pCurLayer->pLumaQp[iMbXy], pCtx)) {
return -1;//abnormal
if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
if (uiCbpL & (1 << iId8x8)) {
int32_t iIndex = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
if (WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex,
iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty,
pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4, pCurLayer->pLumaQp[iMbXy], pCtx)) {
return -1;
}
iIndex++;
}
iIndex++;
} else {
ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
}
} else {
ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
}
ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
} else { // Normal T4x4
for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
if (uiCbpL & (1 << iId8x8)) {
int32_t iIndex = (iId8x8 << 2);
for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
//Luma (DC and AC decoding together)
if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex,
iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan + iScanIdxStart, iMbResProperty,
pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), pCurLayer->pLumaQp[iMbXy], pCtx)) {
return -1;//abnormal
}
iIndex++;
}
} else {
ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
}
}
ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
@ -1661,6 +1921,9 @@ int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin
int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
uint32_t uiCode;
pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
if (-1 == pSlice->iMbSkipRun) {
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run
pSlice->iMbSkipRun = uiCode;

View File

@ -862,6 +862,21 @@ void AssignFuncPointerForRec (PWelsDecoderContext pCtx) {
pCtx->pGetI4x4LumaPredFunc[I4_PRED_HU ] = WelsI4x4LumaPredHU_c;
pCtx->pGetI4x4LumaPredFunc[I4_PRED_HD ] = WelsI4x4LumaPredHD_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_V ] = WelsI8x8LumaPredV_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_H ] = WelsI8x8LumaPredH_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_DC ] = WelsI8x8LumaPredDc_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_DC_L ] = WelsI8x8LumaPredDcLeft_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_DC_T ] = WelsI8x8LumaPredDcTop_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_DC_128] = WelsI8x8LumaPredDcNA_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_DDL ] = WelsI8x8LumaPredDDL_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_DDL_TOP] = WelsI8x8LumaPredDDLTop_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_DDR ] = WelsI8x8LumaPredDDR_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_VL ] = WelsI8x8LumaPredVL_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_VL_TOP] = WelsI8x8LumaPredVLTop_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_VR ] = WelsI8x8LumaPredVR_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_HU ] = WelsI8x8LumaPredHU_c;
pCtx->pGetI8x8LumaPredFunc[I4_PRED_HD ] = WelsI8x8LumaPredHD_c;
pCtx->pGetIChromaPredFunc[C_PRED_DC ] = WelsIChromaPredDc_c;
pCtx->pGetIChromaPredFunc[C_PRED_H ] = WelsIChromaPredH_c;
pCtx->pGetIChromaPredFunc[C_PRED_V ] = WelsIChromaPredV_c;
@ -872,6 +887,8 @@ void AssignFuncPointerForRec (PWelsDecoderContext pCtx) {
pCtx->pIdctResAddPredFunc = IdctResAddPred_c;
pCtx->pIdctResAddPredFunc8x8 = IdctResAddPred8x8_c;
#if defined(HAVE_NEON)
if (pCtx->uiCpuFlag & WELS_CPU_NEON) {
pCtx->pIdctResAddPredFunc = IdctResAddPred_neon;
@ -931,7 +948,7 @@ void AssignFuncPointerForRec (PWelsDecoderContext pCtx) {
if (pCtx->uiCpuFlag & WELS_CPU_MMXEXT) {
pCtx->pIdctResAddPredFunc = IdctResAddPred_mmx;
/////////mmx code opt---
///////mmx code opt---
pCtx->pGetIChromaPredFunc[C_PRED_H] = WelsDecoderIChromaPredH_mmx;
pCtx->pGetIChromaPredFunc[C_PRED_V] = WelsDecoderIChromaPredV_mmx;
pCtx->pGetIChromaPredFunc[C_PRED_DC_L ] = WelsDecoderIChromaPredDcLeft_mmx;

View File

@ -1238,11 +1238,16 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid
int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[][]");
pCtx->sMb.pLumaQp[i] = (int8_t*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
"pCtx->sMb.pLumaQp[]");
pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = (bool*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
bool),
"pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
"pCtx->sMb.pTransformSize8x8Flag[]");
pCtx->sMb.pChromaQp[i] = (int8_t (*)[2])WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 2,
"pCtx->sMb.pChromaQp[]");
pCtx->sMb.pMvd[i][0] = (int16_t (*)[16][2])WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]");
pCtx->sMb.pCbfDc[i] = (uint8_t*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint8_t),
pCtx->sMb.pCbfDc[i] = (uint16_t*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint16_t),
"pCtx->sMb.pCbfDc[]");
pCtx->sMb.pNzc[i] = (int8_t (*)[24])WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
"pCtx->sMb.pNzc[]");
@ -1255,6 +1260,8 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid
"pCtx->sMb.pIntraPredMode[]");
pCtx->sMb.pIntra4x4FinalMode[i] = (int8_t (*)[MB_BLOCK4x4_NUM])WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
sizeof (int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pIntra4x4FinalMode[]");
pCtx->sMb.pIntraNxNAvailFlag[i] = (uint8_t (*))WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
"pCtx->sMb.pIntraNxNAvailFlag");
pCtx->sMb.pChromaPredMode[i] = (int8_t*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
"pCtx->sMb.pChromaPredMode[]");
pCtx->sMb.pCbp[i] = (int8_t*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
@ -1341,6 +1348,18 @@ void UninitialDqLayersContext (PWelsDecoderContext pCtx) {
pCtx->sMb.pRefIndex[i][0] = NULL;
}
if (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i]) {
WelsFree (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i], "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = NULL;
}
if (pCtx->sMb.pTransformSize8x8Flag[i]) {
WelsFree (pCtx->sMb.pTransformSize8x8Flag[i], "pCtx->sMb.pTransformSize8x8Flag[]");
pCtx->sMb.pTransformSize8x8Flag[i] = NULL;
}
if (pCtx->sMb.pLumaQp[i]) {
WelsFree (pCtx->sMb.pLumaQp[i], "pCtx->sMb.pLumaQp[]");
@ -1393,6 +1412,12 @@ void UninitialDqLayersContext (PWelsDecoderContext pCtx) {
pCtx->sMb.pIntra4x4FinalMode[i] = NULL;
}
if (pCtx->sMb.pIntraNxNAvailFlag[i]) {
WelsFree (pCtx->sMb.pIntraNxNAvailFlag[i], "pCtx->sMb.pIntraNxNAvailFlag");
pCtx->sMb.pIntraNxNAvailFlag[i] = NULL;
}
if (pCtx->sMb.pChromaPredMode[i]) {
WelsFree (pCtx->sMb.pChromaPredMode[i], "pCtx->sMb.pChromaPredMode[]");
@ -1989,7 +2014,7 @@ static inline void InitDqLayerInfo (PDqLayer pDqLayer, PLayerInfo pLayerInfo, PN
if (kuiQualityId == BASE_QUALITY_ID) {
pDqLayer->pRefPicListReordering = &pSh->pRefPicListReordering;
pDqLayer->pRefPicMarking = &pSh->sRefMarking;
if (pSh->pPps->bWeightedPredFlag) {
pDqLayer->bUseWeightPredictionFlag = true;
pDqLayer->pPredWeightTable = &pSh->sPredWeightTable;
@ -2029,6 +2054,8 @@ void InitCurDqLayerData (PWelsDecoderContext pCtx, PDqLayer pCurDq) {
pCurDq->pSliceIdc = pCtx->sMb.pSliceIdc[0];
pCurDq->pMv[0] = pCtx->sMb.pMv[0][0];
pCurDq->pRefIndex[0] = pCtx->sMb.pRefIndex[0][0];
pCurDq->pNoSubMbPartSizeLessThan8x8Flag = pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[0];
pCurDq->pTransformSize8x8Flag = pCtx->sMb.pTransformSize8x8Flag[0];
pCurDq->pLumaQp = pCtx->sMb.pLumaQp[0];
pCurDq->pChromaQp = pCtx->sMb.pChromaQp[0];
pCurDq->pMvd[0] = pCtx->sMb.pMvd[0][0];
@ -2038,6 +2065,7 @@ void InitCurDqLayerData (PWelsDecoderContext pCtx, PDqLayer pCurDq) {
pCurDq->pScaledTCoeff = pCtx->sMb.pScaledTCoeff[0];
pCurDq->pIntraPredMode = pCtx->sMb.pIntraPredMode[0];
pCurDq->pIntra4x4FinalMode = pCtx->sMb.pIntra4x4FinalMode[0];
pCurDq->pIntraNxNAvailFlag = pCtx->sMb.pIntraNxNAvailFlag[0];
pCurDq->pChromaPredMode = pCtx->sMb.pChromaPredMode[0];
pCurDq->pCbp = pCtx->sMb.pCbp[0];
pCurDq->pSubMbType = pCtx->sMb.pSubMbType[0];

View File

@ -380,6 +380,507 @@ void WelsI4x4LumaPredHD_c (uint8_t* pPred, const int32_t kiStride) {
ST32A4 (pPred + kiStride3, LD32 (kuiList));
}
void WelsI8x8LumaPredV_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
uint64_t uiTop = 0;
int32_t iStride[8];
uint8_t uiPixelFilterT[8];
int32_t i;
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
}
uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
for (i = 1; i < 7; i++) {
uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
}
uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : ((
pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
// 8-89
for (i = 7; i >= 0; i--) {
uiTop = ((uiTop << 8) | uiPixelFilterT[i]);
}
for (i = 0; i < 8; i++) {
ST64A8 (pPred + kiStride * i, uiTop);
}
}
void WelsI8x8LumaPredH_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
uint64_t uiLeft;
int32_t iStride[8];
uint8_t uiPixelFilterL[8];
int32_t i;
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
}
uiPixelFilterL[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2) : ((
pPred[-1] * 3 + pPred[-1 + iStride[1]] + 2) >> 2);
for (i = 1; i < 7; i++) {
uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
2);
}
uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
// 8-90
for (i = 0; i < 8; i++) {
uiLeft = 0x0101010101010101U * uiPixelFilterL[i];
ST64A8 (pPred + iStride[i], uiLeft);
}
}
void WelsI8x8LumaPredDc_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
int32_t iStride[8];
uint8_t uiPixelFilterL[8];
uint8_t uiPixelFilterT[8];
uint16_t uiTotal = 0;
int32_t i;
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
}
uiPixelFilterL[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2) : ((
pPred[-1] * 3 + pPred[-1 + iStride[1]] + 2) >> 2);
uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
for (i = 1; i < 7; i++) {
uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
2);
uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
}
uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : ((
pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
// 8-91
for (i = 0; i < 8; i++) {
uiTotal += uiPixelFilterL[i];
uiTotal += uiPixelFilterT[i];
}
const uint8_t kuiMean = ((uiTotal + 8) >> 4);
const uint64_t kuiMean64 = 0x0101010101010101U * kuiMean;
for (i = 0; i < 8; i++) {
ST64A8 (pPred + iStride[i], kuiMean64);
}
}
void WelsI8x8LumaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
int32_t iStride[8];
uint8_t uiPixelFilterL[8];
uint16_t uiTotal = 0;
int32_t i;
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
}
uiPixelFilterL[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2) : ((
pPred[-1] * 3 + pPred[-1 + iStride[1]] + 2) >> 2);
for (i = 1; i < 7; i++) {
uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
2);
}
uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
// 8-92
for (i = 0; i < 8; i++) {
uiTotal += uiPixelFilterL[i];
}
const uint8_t kuiMean = ((uiTotal + 4) >> 3);
const uint64_t kuiMean64 = 0x0101010101010101U * kuiMean;
for (i = 0; i < 8; i++) {
ST64A8 (pPred + iStride[i], kuiMean64);
}
}
void WelsI8x8LumaPredDcTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
int32_t iStride[8];
uint8_t uiPixelFilterT[8];
uint16_t uiTotal = 0;
int32_t i;
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
}
uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
for (i = 1; i < 7; i++) {
uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
}
uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : ((
pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
// 8-93
for (i = 0; i < 8; i++) {
uiTotal += uiPixelFilterT[i];
}
const uint8_t kuiMean = ((uiTotal + 4) >> 3);
const uint64_t kuiMean64 = 0x0101010101010101U * kuiMean;
for (i = 0; i < 8; i++) {
ST64A8 (pPred + iStride[i], kuiMean64);
}
}
void WelsI8x8LumaPredDcNA_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
// for normal 8 bit depth, 8-94
const uint64_t kuiDC64 = 0x8080808080808080U;
int32_t iStride[8];
int32_t i;
ST64A8 (pPred, kuiDC64);
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
ST64A8 (pPred + iStride[i], kuiDC64);
}
}
/*down pLeft*/
void WelsI8x8LumaPredDDL_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
// Top and Top-right available
int32_t iStride[8];
uint8_t uiPixelFilterT[16];
int32_t i, j;
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
}
uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
for (i = 1; i < 15; i++) {
uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
}
uiPixelFilterT[15] = ((pPred[14 - kiStride] + pPred[15 - kiStride] * 3 + 2) >> 2);
for (i = 0; i < 8; i++) { // y
for (j = 0; j < 8; j++) { // x
if (i == 7 && j == 7) { // 8-95
pPred[j + iStride[i]] = (uiPixelFilterT[14] + 3 * uiPixelFilterT[15] + 2) >> 2;
} else { // 8-96
pPred[j + iStride[i]] = (uiPixelFilterT[i + j] + (uiPixelFilterT[i + j + 1] << 1) + uiPixelFilterT[i + j + 2] + 2) >> 2;
}
}
}
}
/*down pLeft*/
void WelsI8x8LumaPredDDLTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
// Top available and Top-right unavailable
int32_t iStride[8];
uint8_t uiPixelFilterT[16];
int32_t i, j;
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
}
uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
for (i = 1; i < 7; i++) {
uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
}
// p[x, -1] x=8...15 are replaced with p[7, -1]
uiPixelFilterT[7] = ((pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
for (i = 8; i < 16; i++) {
uiPixelFilterT[i] = pPred[7 - kiStride];
}
for (i = 0; i < 8; i++) { // y
for (j = 0; j < 8; j++) { // x
if (i == 7 && j == 7) { // 8-95
pPred[j + iStride[i]] = (uiPixelFilterT[14] + 3 * uiPixelFilterT[15] + 2) >> 2;
} else { // 8-96
pPred[j + iStride[i]] = (uiPixelFilterT[i + j] + (uiPixelFilterT[i + j + 1] << 1) + uiPixelFilterT[i + j + 2] + 2) >> 2;
}
}
}
}
/*down right*/
void WelsI8x8LumaPredDDR_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
// The TopLeft, Top, Left are all available under this mode
int32_t iStride[8];
uint8_t uiPixelFilterTL;
uint8_t uiPixelFilterL[8];
uint8_t uiPixelFilterT[8];
int32_t i, j;
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
}
uiPixelFilterTL = (pPred[-1] + (pPred[-1 - kiStride] << 1) + pPred[-kiStride] + 2) >> 2;
uiPixelFilterL[0] = ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2);
uiPixelFilterT[0] = ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2);
for (i = 1; i < 7; i++) {
uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
2);
uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
}
uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : ((
pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
for (i = 0; i < 8; i++) { // y
// 8-98, x < y-1
for (j = 0; j < (i - 1); j++) {
pPred[j + iStride[i]] = (uiPixelFilterL[i - j - 2] + (uiPixelFilterL[i - j - 1] << 1) + uiPixelFilterL[i - j] + 2) >> 2;
}
// 8-98, special case, x == y-1
if (i >= 1) {
j = i - 1;
pPred[j + iStride[i]] = (uiPixelFilterTL + (uiPixelFilterL[0] << 1) + uiPixelFilterL[1] + 2) >> 2;
}
// 8-99, x==y
j = i;
pPred[j + iStride[i]] = (uiPixelFilterT[0] + (uiPixelFilterTL << 1) + uiPixelFilterL[0] + 2) >> 2;
// 8-97, special case, x == y+1
if (i < 7) {
j = i + 1;
pPred[j + iStride[i]] = (uiPixelFilterTL + (uiPixelFilterT[0] << 1) + uiPixelFilterT[1] + 2) >> 2;
}
for (j = i + 2; j < 8; j++) { // 8-97, x > y+1
pPred[j + iStride[i]] = (uiPixelFilterT[j - i - 2] + (uiPixelFilterT[j - i - 1] << 1) + uiPixelFilterT[j - i] + 2) >> 2;
}
}
}
/*vertical pLeft*/
void WelsI8x8LumaPredVL_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
// Top and Top-right available
int32_t iStride[8];
uint8_t uiPixelFilterT[16];
int32_t i, j;
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
}
uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
for (i = 1; i < 15; i++) {
uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
}
uiPixelFilterT[15] = ((pPred[14 - kiStride] + pPred[15 - kiStride] * 3 + 2) >> 2);
for (i = 0; i < 8; i++) { // y
if ((i & 0x01) == 0) { // 8-108
for (j = 0; j < 8; j++) { // x
pPred[j + iStride[i]] = (uiPixelFilterT[j + (i >> 1)] + uiPixelFilterT[j + (i >> 1) + 1] + 1) >> 1;
}
} else { // 8-109
for (j = 0; j < 8; j++) { // x
pPred[j + iStride[i]] = (uiPixelFilterT[j + (i >> 1)] + (uiPixelFilterT[j + (i >> 1) + 1] << 1) + uiPixelFilterT[j +
(i >> 1) + 2] + 2) >> 2;
}
}
}
}
/*vertical pLeft*/
void WelsI8x8LumaPredVLTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
// Top available and Top-right unavailable
int32_t iStride[8];
uint8_t uiPixelFilterT[16];
int32_t i, j;
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
}
uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
for (i = 1; i < 7; i++) {
uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
}
// p[x, -1] x=8...15 are replaced with p[7, -1]
uiPixelFilterT[7] = ((pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
for (i = 8; i < 16; i++) {
uiPixelFilterT[i] = pPred[7 - kiStride];
}
for (i = 0; i < 8; i++) { // y
if ((i & 0x01) == 0) { // 8-108
for (j = 0; j < 8; j++) { // x
pPred[j + iStride[i]] = (uiPixelFilterT[j + (i >> 1)] + uiPixelFilterT[j + (i >> 1) + 1] + 1) >> 1;
}
} else { // 8-109
for (j = 0; j < 8; j++) { // x
pPred[j + iStride[i]] = (uiPixelFilterT[j + (i >> 1)] + (uiPixelFilterT[j + (i >> 1) + 1] << 1) + uiPixelFilterT[j +
(i >> 1) + 2] + 2) >> 2;
}
}
}
}
/*vertical right*/
void WelsI8x8LumaPredVR_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
// The TopLeft, Top, Left are always available under this mode
int32_t iStride[8];
uint8_t uiPixelFilterTL;
uint8_t uiPixelFilterL[8];
uint8_t uiPixelFilterT[8];
int32_t i, j;
int32_t izVR, izVRDiv;
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
}
uiPixelFilterTL = (pPred[-1] + (pPred[-1 - kiStride] << 1) + pPred[-kiStride] + 2) >> 2;
uiPixelFilterL[0] = ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2);
uiPixelFilterT[0] = ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2);
for (i = 1; i < 7; i++) {
uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
2);
uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
}
uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : ((
pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
for (i = 0; i < 8; i++) { // y
for (j = 0; j < 8; j++) { // x
izVR = (j << 1) - i; // 2 * x - y
izVRDiv = j - (i >> 1);
if (izVR >= 0) {
if ((izVR & 0x01) == 0) { // 8-100
if (izVRDiv > 0) {
pPred[j + iStride[i]] = (uiPixelFilterT[izVRDiv - 1] + uiPixelFilterT[izVRDiv] + 1) >> 1;
} else {
pPred[j + iStride[i]] = (uiPixelFilterTL + uiPixelFilterT[0] + 1) >> 1;
}
} else { // 8-101
if (izVRDiv > 1) {
pPred[j + iStride[i]] = (uiPixelFilterT[izVRDiv - 2] + (uiPixelFilterT[izVRDiv - 1] << 1) + uiPixelFilterT[izVRDiv] + 2)
>> 2;
} else {
pPred[j + iStride[i]] = (uiPixelFilterTL + (uiPixelFilterT[0] << 1) + uiPixelFilterT[1] + 2) >> 2;
}
}
} else if (izVR == -1) { // 8-102
pPred[j + iStride[i]] = (uiPixelFilterL[0] + (uiPixelFilterTL << 1) + uiPixelFilterT[0] + 2) >> 2;
} else if (izVR < -2) { // 8-103
pPred[j + iStride[i]] = (uiPixelFilterL[-izVR - 1] + (uiPixelFilterL[-izVR - 2] << 1) + uiPixelFilterL[-izVR - 3] + 2)
>> 2;
} else { // izVR==-2, 8-103, special case
pPred[j + iStride[i]] = (uiPixelFilterL[1] + (uiPixelFilterL[0] << 1) + uiPixelFilterTL + 2) >> 2;
}
}
}
}
/*horizontal up*/
void WelsI8x8LumaPredHU_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
int32_t iStride[8];
uint8_t uiPixelFilterL[8];
int32_t i, j;
int32_t izHU;
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
}
uiPixelFilterL[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2) : ((
pPred[-1] * 3 + pPred[-1 + iStride[1]] + 2) >> 2);
for (i = 1; i < 7; i++) {
uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
2);
}
uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
for (i = 0; i < 8; i++) { // y
for (j = 0; j < 8; j++) { // x
izHU = j + (i << 1); // x + 2 * y
if (izHU < 13) {
if ((izHU & 0x01) == 0) { // 8-110
pPred[j + iStride[i]] = (uiPixelFilterL[izHU >> 1] + uiPixelFilterL[1 + (izHU >> 1)] + 1) >> 1;
} else { // 8-111
pPred[j + iStride[i]] = (uiPixelFilterL[izHU >> 1] + (uiPixelFilterL[1 + (izHU >> 1)] << 1) + uiPixelFilterL[2 +
(izHU >> 1)] + 2) >> 2;
}
} else if (izHU == 13) { // 8-112
pPred[j + iStride[i]] = (uiPixelFilterL[6] + 3 * uiPixelFilterL[7] + 2) >> 2;
} else { // 8-113
pPred[j + iStride[i]] = uiPixelFilterL[7];
}
}
}
}
/*horizontal down*/
void WelsI8x8LumaPredHD_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
// The TopLeft, Top, Left are all available under this mode
int32_t iStride[8];
uint8_t uiPixelFilterTL;
uint8_t uiPixelFilterL[8];
uint8_t uiPixelFilterT[8];
int32_t i, j;
int32_t izHD, izHDDiv;
for (iStride[0] = 0, i = 1; i < 8; i++) {
iStride[i] = iStride[i - 1] + kiStride;
}
uiPixelFilterTL = (pPred[-1] + (pPred[-1 - kiStride] << 1) + pPred[-kiStride] + 2) >> 2;
uiPixelFilterL[0] = ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2);
uiPixelFilterT[0] = ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2);
for (i = 1; i < 7; i++) {
uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
2);
uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
}
uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : ((
pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
for (i = 0; i < 8; i++) { // y
for (j = 0; j < 8; j++) { // x
izHD = (i << 1) - j; // 2*y - x
izHDDiv = i - (j >> 1);
if (izHD >= 0) {
if ((izHD & 0x01) == 0) { // 8-104
if (izHDDiv == 0) {
pPred[j + iStride[i]] = (uiPixelFilterTL + uiPixelFilterL[0] + 1) >> 1;
} else {
pPred[j + iStride[i]] = (uiPixelFilterL[izHDDiv - 1] + uiPixelFilterL[izHDDiv] + 1) >> 1;
}
} else { // 8-105
if (izHDDiv == 1) {
pPred[j + iStride[i]] = (uiPixelFilterTL + (uiPixelFilterL[0] << 1) + uiPixelFilterL[1] + 2) >> 2;
} else {
pPred[j + iStride[i]] = (uiPixelFilterL[izHDDiv - 2] + (uiPixelFilterL[izHDDiv - 1] << 1) + uiPixelFilterL[izHDDiv] + 2)
>> 2;
}
}
} else if (izHD == -1) { // 8-106
pPred[j + iStride[i]] = (uiPixelFilterL[0] + (uiPixelFilterTL << 1) + uiPixelFilterT[0] + 2) >> 2;
} else if (izHD < -2) { // 8-107
pPred[j + iStride[i]] = (uiPixelFilterT[-izHD - 1] + (uiPixelFilterT[-izHD - 2] << 1) + uiPixelFilterT[-izHD - 3] + 2)
>> 2;
} else { // 8-107 special case, izHD==-2
pPred[j + iStride[i]] = (uiPixelFilterT[1] + (uiPixelFilterT[0] << 1) + uiPixelFilterTL + 2) >> 2;
}
}
}
}
void WelsIChromaPredV_c (uint8_t* pPred, const int32_t kiStride) {
const uint64_t kuiVal64 = LD64A8 (&pPred[-kiStride]);
const int32_t kiStride2 = kiStride << 1;

View File

@ -127,7 +127,7 @@ int32_t WelsInitRefList (PWelsDecoderContext pCtx, int32_t iPoc) {
|| (ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->eErrorConMethod)
|| (ERROR_CON_SLICE_MV_COPY_CROSS_IDR == pCtx->eErrorConMethod)
|| (ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->eErrorConMethod))
&& (NULL != pCtx->pPreviousDecodedPictureInDpb);
&& (NULL != pCtx->pPreviousDecodedPictureInDpb);
bCopyPrevious = bCopyPrevious && (pRef->iWidthInPixel == pCtx->pPreviousDecodedPictureInDpb->iWidthInPixel)
&& (pRef->iHeightInPixel == pCtx->pPreviousDecodedPictureInDpb->iHeightInPixel);

View File

@ -35,13 +35,14 @@
#include "error_code.h"
namespace WelsDec {
#define IDX_UNUSED -1
static const int16_t g_kMaxPos [] = {IDX_UNUSED, 15, 14, 15, 3, 14, 3, 3, 14, 14};
static const int16_t g_kMaxC2 [] = {IDX_UNUSED, 4, 4, 4, 3, 4, 3, 3, 4, 4};
static const int16_t g_kBlockCat2CtxOffsetCBF[] = {IDX_UNUSED, 0, 4, 8, 12, 16, 12, 12, 16, 16};
static const int16_t g_kBlockCat2CtxOffsetMap [] = {IDX_UNUSED, 0, 15, 29, 44, 47, 44, 44, 47, 47};
static const int16_t g_kBlockCat2CtxOffsetLast[] = {IDX_UNUSED, 0, 15, 29, 44, 47, 44, 44, 47, 47};
static const int16_t g_kBlockCat2CtxOffsetOne [] = {IDX_UNUSED, 0 , 10, 20, 30, 39, 30, 30, 39, 39};
static const int16_t g_kBlockCat2CtxOffsetAbs [] = {IDX_UNUSED, 0 , 10, 20, 30, 39, 30, 30, 39, 39};
static const int16_t g_kMaxPos [] = {IDX_UNUSED, 15, 14, 15, 3, 14, 63, 3, 3, 14, 14};
static const int16_t g_kMaxC2 [] = {IDX_UNUSED, 4, 4, 4, 3, 4, 4, 3, 3, 4, 4};
static const int16_t g_kBlockCat2CtxOffsetCBF[] = {IDX_UNUSED, 0, 4, 8, 12, 16, 0, 12, 12, 16, 16};
static const int16_t g_kBlockCat2CtxOffsetMap [] = {IDX_UNUSED, 0, 15, 29, 44, 47, 0, 44, 44, 47, 47};
static const int16_t g_kBlockCat2CtxOffsetLast[] = {IDX_UNUSED, 0, 15, 29, 44, 47, 0, 44, 44, 47, 47};
static const int16_t g_kBlockCat2CtxOffsetOne [] = {IDX_UNUSED, 0 , 10, 20, 30, 39, 0, 30, 30, 39, 39};
static const int16_t g_kBlockCat2CtxOffsetAbs [] = {IDX_UNUSED, 0 , 10, 20, 30, 39, 0, 30, 30, 39, 39};
const uint8_t g_kTopBlkInsideMb[24] = { //for index with z-order 0~23
// 0 1 | 4 5 luma 8*8 block pNonZeroCount[16+8]
@ -275,6 +276,24 @@ int32_t ParseMBTypePSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeigh
}
return ERR_NONE;
}
int32_t ParseTransformSize8x8FlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail,
bool& bTransformSize8x8Flag) {
uint32_t uiCode;
int32_t iIdxA, iIdxB;
int32_t iCtxInc;
PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_TS_8x8_FLAG;
iIdxA = (pNeighAvail->iLeftAvail) && (pCtx->pCurDqLayer->pTransformSize8x8Flag[pCtx->pCurDqLayer->iMbXyIndex - 1]);
iIdxB = (pNeighAvail->iTopAvail)
&& (pCtx->pCurDqLayer->pTransformSize8x8Flag[pCtx->pCurDqLayer->iMbXyIndex - pCtx->pCurDqLayer->iMbWidth]);
iCtxInc = iIdxA + iIdxB;
WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + iCtxInc, uiCode));
bTransformSize8x8Flag = !!uiCode;
return ERR_NONE;
}
int32_t ParseSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType) {
uint32_t uiCode;
PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
@ -471,6 +490,9 @@ int32_t ParseInterMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNe
pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterSubMbTypeInfo[uiSubMbType].iType;
pSubPartCount[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartCount;
pPartW[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartWidth;
// Need modification when B picture add in, reference to 7.3.5
pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] &= (uiSubMbType == 0);
}
for (i = 0; i < 4; i++) {
@ -721,7 +743,7 @@ int32_t ParseCbfInfoCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNzcCache, int3
int32_t iCurrBlkXy = pCtx->pCurDqLayer->iMbXyIndex;
int32_t iTopBlkXy = iCurrBlkXy - pCtx->pCurDqLayer->iMbWidth; //default value: MB neighboring
int32_t iLeftBlkXy = iCurrBlkXy - 1; //default value: MB neighboring
uint8_t* pCbfDc = pCtx->pCurDqLayer->pCbfDc;
uint16_t* pCbfDc = pCtx->pCurDqLayer->pCbfDc;
int16_t* pMbType = pCtx->pCurDqLayer->pMbType;
int32_t iCtxInc;
uiCbfBit = 0;
@ -760,22 +782,30 @@ int32_t ParseCbfInfoCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNzcCache, int3
int32_t ParseSignificantMapCabac (int32_t* pSignificantMap, int32_t iResProperty, PWelsDecoderContext pCtx,
uint32_t& uiCoeffNum) {
uint32_t uiCode;
PWelsCabacCtx pMapCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_MAP + g_kBlockCat2CtxOffsetMap [iResProperty];
PWelsCabacCtx pLastCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_LAST + g_kBlockCat2CtxOffsetLast[iResProperty];
PWelsCabacCtx pMapCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_MAP_8x8 : NEW_CTX_OFFSET_MAP)
+ g_kBlockCat2CtxOffsetMap [iResProperty];
PWelsCabacCtx pLastCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_LAST_8x8 :
NEW_CTX_OFFSET_LAST) + g_kBlockCat2CtxOffsetLast[iResProperty];
int32_t i;
uiCoeffNum = 0;
int32_t i0 = 0;
int32_t i1 = g_kMaxPos[iResProperty];
int32_t iCtx;
for (i = i0; i < i1; ++i) {
iCtx = (iResProperty == LUMA_DC_AC_8 ? g_kuiIdx2CtxSignificantCoeffFlag8x8[i] : i);
//read significant
WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pMapCtx + i, uiCode));
WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pMapCtx + iCtx, uiCode));
if (uiCode) {
* (pSignificantMap++) = 1;
++ uiCoeffNum;
//read last significant
WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pLastCtx + i, uiCode));
iCtx = (iResProperty == LUMA_DC_AC_8 ? g_kuiIdx2CtxLastSignificantCoeffFlag8x8[i] : i);
WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pLastCtx + iCtx, uiCode));
if (uiCode) {
memset (pSignificantMap, 0, (i1 - i) * sizeof (int32_t));
return ERR_NONE;
@ -796,8 +826,11 @@ int32_t ParseSignificantMapCabac (int32_t* pSignificantMap, int32_t iResProperty
int32_t ParseSignificantCoeffCabac (int32_t* pSignificant, int32_t iResProperty, PWelsDecoderContext pCtx) {
uint32_t uiCode;
PWelsCabacCtx pOneCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_ONE + g_kBlockCat2CtxOffsetOne[iResProperty];
PWelsCabacCtx pAbsCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_ABS + g_kBlockCat2CtxOffsetAbs[iResProperty];
PWelsCabacCtx pOneCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_ONE_8x8 : NEW_CTX_OFFSET_ONE) +
g_kBlockCat2CtxOffsetOne[iResProperty];
PWelsCabacCtx pAbsCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_ABS_8x8 : NEW_CTX_OFFSET_ABS) +
g_kBlockCat2CtxOffsetAbs[iResProperty];
const int16_t iMaxType = g_kMaxC2[iResProperty];
int32_t i = g_kMaxPos[iResProperty];
int32_t* pCoff = pSignificant + i;
@ -826,6 +859,46 @@ int32_t ParseSignificantCoeffCabac (int32_t* pSignificant, int32_t iResProperty,
return ERR_NONE;
}
int32_t ParseResidualBlockCabac8x8 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
int32_t iIndex, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty,
short* sTCoeff, /*int mb_mode*/ uint8_t uiQp, PWelsDecoderContext pCtx) {
uint32_t uiTotalCoeffNum = 0;
uint32_t uiCbpBit;
int32_t pSignificantMap[64] = {0};
int32_t iMbResProperty = 0;
GetMbResProperty (&iMbResProperty, &iResProperty, false);
const uint16_t* pDeQuantMul = (pCtx->bUseScalingList) ? pCtx->pDequant_coeff8x8[iMbResProperty - 6][uiQp] :
g_kuiDequantCoeff8x8[uiQp];
uiCbpBit = 1; // for 8x8, MaxNumCoeff == 64 && uiCbpBit == 1
if (uiCbpBit) { //has coeff
WELS_READ_VERIFY (ParseSignificantMapCabac (pSignificantMap, iResProperty, pCtx, uiTotalCoeffNum));
WELS_READ_VERIFY (ParseSignificantCoeffCabac (pSignificantMap, iResProperty, pCtx));
}
pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex]] =
pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 1]] =
pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 2]] =
pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 3]] = (uint8_t)uiTotalCoeffNum;
if (uiTotalCoeffNum == 0) {
return ERR_NONE;
}
int32_t j = 0, i;
if (iResProperty == LUMA_DC_AC_8) {
do {
if (pSignificantMap[j] != 0) {
i = pScanTable[ j ];
sTCoeff[i] = uiQp >= 36 ? ((pSignificantMap[j] * pDeQuantMul[i]) << (uiQp / 6 - 6)) : ((
pSignificantMap[j] * pDeQuantMul[i] + (1 << (5 - uiQp / 6))) >> (6 - uiQp / 6));
}
++j;
} while (j < 64);
}
return ERR_NONE;
}
int32_t ParseResidualBlockCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
int32_t iIndex, int32_t iMaxNumCoeff,
const uint8_t* pScanTable, int32_t iResProperty, short* sTCoeff, /*int mb_mode*/ uint8_t uiQp,

View File

@ -151,7 +151,7 @@ void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCo
pNonZeroCount[5 + 8 * 5] = -1;//unavailable
}
}
void WelsFillCacheConstrain1Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
PDqLayer pCurLayer) { //no matter slice type
int32_t iCurXy = pCurLayer->iMbXyIndex;
int32_t iTopXy = 0;
@ -197,7 +197,7 @@ void WelsFillCacheConstrain1Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNon
}
}
void WelsFillCacheConstrain0Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
PDqLayer pCurLayer) { //no matter slice type
int32_t iCurXy = pCurLayer->iMbXyIndex;
int32_t iTopXy = 0;
@ -214,7 +214,7 @@ void WelsFillCacheConstrain0Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNon
}
//intra4x4_pred_mode
if (pNeighAvail->iTopAvail && IS_INTRA4x4 (pNeighAvail->iTopType)) { //top
if (pNeighAvail->iTopAvail && IS_INTRANxN (pNeighAvail->iTopType)) { //top
ST32 (pIntraPredMode + 1, LD32 (&pCurLayer->pIntraPredMode[iTopXy][0]));
} else {
int32_t iPred;
@ -225,7 +225,7 @@ void WelsFillCacheConstrain0Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNon
ST32 (pIntraPredMode + 1, iPred);
}
if (pNeighAvail->iLeftAvail && IS_INTRA4x4 (pNeighAvail->iLeftType)) { //left
if (pNeighAvail->iLeftAvail && IS_INTRANxN (pNeighAvail->iLeftType)) { //left
pIntraPredMode[ 0 + 8 * 1] = pCurLayer->pIntraPredMode[iLeftXy][4];
pIntraPredMode[ 0 + 8 * 2] = pCurLayer->pIntraPredMode[iLeftXy][5];
pIntraPredMode[ 0 + 8 * 3] = pCurLayer->pIntraPredMode[iLeftXy][6];
@ -565,12 +565,13 @@ int32_t CheckIntraChromaPredMode (uint8_t uiSampleAvail, int8_t* pMode) {
return 0;
}
int32_t CheckIntra4x4PredMode (int32_t* pSampleAvail, int8_t* pMode, int32_t iIndex) {
int32_t CheckIntraNxNPredMode (int32_t* pSampleAvail, int8_t* pMode, int32_t iIndex, bool b8x8) {
int8_t iIdx = g_kuiCache30ScanIdx[iIndex];
int32_t iLeftAvail = pSampleAvail[iIdx - 1];
int32_t iTopAvail = pSampleAvail[iIdx - 6];
int32_t bLeftTopAvail = pSampleAvail[iIdx - 7];
int32_t bRightTopAvail = pSampleAvail[iIdx - 5];
int32_t bRightTopAvail = pSampleAvail[iIdx - (b8x8 ? 4 : 5)]; // Diff with 4x4 Pred
int8_t iFinalMode;
@ -900,6 +901,93 @@ int32_t WelsResidualBlockCavlc (SVlcTable* pVlcTable, uint8_t* pNonZeroCountCach
return 0;
}
int32_t WelsResidualBlockCavlc8x8 (SVlcTable* pVlcTable, uint8_t* pNonZeroCountCache, PBitStringAux pBs, int32_t iIndex,
int32_t iMaxNumCoeff, const uint8_t* kpZigzagTable, int32_t iResidualProperty,
int16_t* pTCoeff, int32_t iIdx4x4, uint8_t uiQp,
PWelsDecoderContext pCtx) {
int32_t iLevel[16], iZerosLeft, iCoeffNum;
int32_t iRun[16];
int32_t iCurNonZeroCacheIdx, i;
int32_t iMbResProperty = 0;
GetMbResProperty (&iMbResProperty, &iResidualProperty, 1);
const uint16_t* kpDequantCoeff = pCtx->bUseScalingList ? pCtx->pDequant_coeff8x8[iMbResProperty - 6][uiQp] :
g_kuiDequantCoeff8x8[uiQp];
int8_t nA, nB, nC;
uint8_t uiTotalCoeff, uiTrailingOnes;
int32_t iUsedBits = 0;
intX_t iCurIdx = pBs->iIndex;
uint8_t* pBuf = ((uint8_t*)pBs->pStartBuf) + (iCurIdx >> 3);
bool bChromaDc = (CHROMA_DC == iResidualProperty);
uint8_t bChroma = (bChromaDc || CHROMA_AC == iResidualProperty);
SReadBitsCache sReadBitsCache;
uint32_t uiCache32Bit = (uint32_t) ((((pBuf[0] << 8) | pBuf[1]) << 16) | (pBuf[2] << 8) | pBuf[3]);
sReadBitsCache.uiCache32Bit = uiCache32Bit << (iCurIdx & 0x07);
sReadBitsCache.uiRemainBits = 32 - (iCurIdx & 0x07);
sReadBitsCache.pBuf = pBuf;
//////////////////////////////////////////////////////////////////////////
if (bChroma) {
iCurNonZeroCacheIdx = g_kuiCache48CountScan4Idx[iIndex];
nA = pNonZeroCountCache[iCurNonZeroCacheIdx - 1];
nB = pNonZeroCountCache[iCurNonZeroCacheIdx - 8];
} else { //luma
iCurNonZeroCacheIdx = g_kuiCache48CountScan4Idx[iIndex];
nA = pNonZeroCountCache[iCurNonZeroCacheIdx - 1];
nB = pNonZeroCountCache[iCurNonZeroCacheIdx - 8];
}
WELS_NON_ZERO_COUNT_AVERAGE (nC, nA, nB);
iUsedBits += CavlcGetTrailingOnesAndTotalCoeff (uiTotalCoeff, uiTrailingOnes, &sReadBitsCache, pVlcTable, bChromaDc,
nC);
if (iResidualProperty != CHROMA_DC && iResidualProperty != I16_LUMA_DC) {
pNonZeroCountCache[iCurNonZeroCacheIdx] = uiTotalCoeff;
//////////////////////////////////////////////////////////////////////////
}
if (0 == uiTotalCoeff) {
pBs->iIndex += iUsedBits;
return 0;
}
if ((uiTrailingOnes > 3) || (uiTotalCoeff > 16)) { /////////////////check uiTrailingOnes and uiTotalCoeff
return ERR_INFO_CAVLC_INVALID_TOTAL_COEFF_OR_TRAILING_ONES;
}
if ((i = CavlcGetLevelVal (iLevel, &sReadBitsCache, uiTotalCoeff, uiTrailingOnes)) == -1) {
return ERR_INFO_CAVLC_INVALID_LEVEL;
}
iUsedBits += i;
if (uiTotalCoeff < iMaxNumCoeff) {
iUsedBits += CavlcGetTotalZeros (iZerosLeft, &sReadBitsCache, uiTotalCoeff, pVlcTable, bChromaDc);
} else {
iZerosLeft = 0;
}
if ((iZerosLeft < 0) || ((iZerosLeft + uiTotalCoeff) > iMaxNumCoeff)) {
return ERR_INFO_CAVLC_INVALID_ZERO_LEFT;
}
if ((i = CavlcGetRunBefore (iRun, &sReadBitsCache, uiTotalCoeff, pVlcTable, iZerosLeft)) == -1) {
return ERR_INFO_CAVLC_INVALID_RUN_BEFORE;
}
iUsedBits += i;
pBs->iIndex += iUsedBits;
iCoeffNum = -1;
for (i = uiTotalCoeff - 1; i >= 0; --i) { //FIXME merge into rundecode?
int32_t j;
iCoeffNum += iRun[i] + 1; //FIXME add 1 earlier ?
j = (iCoeffNum << 2) + iIdx4x4;
j = kpZigzagTable[ j ];
pTCoeff[j] = uiQp >= 36 ? ((iLevel[i] * kpDequantCoeff[j]) << (uiQp / 6 - 6))
: ((iLevel[i] * kpDequantCoeff[j] + (1 << (5 - uiQp / 6))) >> (6 - uiQp / 6));
}
return 0;
}
int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30],
PBitStringAux pBs) {
PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer;
@ -941,7 +1029,8 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
return ERR_INFO_INVALID_REF_INDEX;
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || !(ppRefPic[iRefIdx]&&ppRefPic[iRefIdx]->bIsComplete);
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx]
&& ppRefPic[iRefIdx]->bIsComplete);
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@ -981,7 +1070,8 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
return ERR_INFO_INVALID_REF_INDEX;
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || !(ppRefPic[iRefIdx[i]]&&ppRefPic[iRefIdx[i]]->bIsComplete);
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]]
&& ppRefPic[iRefIdx[i]]->bIsComplete);
}
for (i = 0; i < 2; i++) {
PredInter16x8Mv (iMvArray, iRefIdxArray, i << 3, iRefIdx[i], iMv);
@ -1017,7 +1107,8 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
return ERR_INFO_INVALID_REF_INDEX;
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || !(ppRefPic[iRefIdx[i]]&&ppRefPic[iRefIdx[i]]->bIsComplete);
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]]
&& ppRefPic[iRefIdx[i]]->bIsComplete);
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@ -1056,6 +1147,9 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterSubMbTypeInfo[uiSubMbType].iType;
iSubPartCount[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartCount;
iPartWidth[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartWidth;
// Need modification when B picture add in, reference to 7.3.5
pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] &= (uiSubMbType == 0);
}
if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) {
@ -1085,7 +1179,8 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
return ERR_INFO_INVALID_REF_INDEX;
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || !(ppRefPic[iRefIdx[i]]&&ppRefPic[iRefIdx[i]]->bIsComplete);
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]]
&& ppRefPic[iRefIdx[i]]->bIsComplete);
pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx ] = pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 1] =
pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 4] = pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 5] = iRefIdx[i];

View File

@ -61,12 +61,66 @@ void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer p
}
}
int32_t RecI8x8Mb (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
RecI8x8Luma (iMbXy, pCtx, pScoeffLevel, pDqLayer);
RecI4x4Chroma (iMbXy, pCtx, pScoeffLevel, pDqLayer);
return ERR_NONE;
}
int32_t RecI8x8Luma (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
/*****get local variable from outer variable********/
/*prediction info*/
uint8_t* pPred = pDqLayer->pPred[0];
int32_t iLumaStride = pDqLayer->iLumaStride;
int32_t* pBlockOffset = pCtx->iDecBlockOffsetArray;
PGetIntraPred8x8Func* pGetI8x8LumaPredFunc = pCtx->pGetI8x8LumaPredFunc;
int8_t* pIntra8x8PredMode = pDqLayer->pIntra4x4FinalMode[iMbXy]; // I_NxN
int16_t* pRS = pScoeffLevel;
/*itransform info*/
PIdctResAddPredFunc pIdctResAddPredFunc = pCtx->pIdctResAddPredFunc8x8;
/*************local variable********************/
uint8_t i = 0;
bool bTLAvail[4], bTRAvail[4];
// Top-Right : Left : Top-Left : Top
bTLAvail[0] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x02);
bTLAvail[1] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x01);
bTLAvail[2] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x04);
bTLAvail[3] = true;
bTRAvail[0] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x01);
bTRAvail[1] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x08);
bTRAvail[2] = true;
bTRAvail[3] = false;
/*************real process*********************/
for (i = 0; i < 4; i++) {
uint8_t* pPredI8x8 = pPred + pBlockOffset[i << 2];
uint8_t uiMode = pIntra8x8PredMode[g_kuiScan4[i << 2]];
pGetI8x8LumaPredFunc[uiMode] (pPredI8x8, iLumaStride, bTLAvail[i], bTRAvail[i]);
int32_t iIndex = g_kuiMbCountScan4Idx[i << 2];
if (pDqLayer->pNzc[iMbXy][iIndex] || pDqLayer->pNzc[iMbXy][iIndex + 1] || pDqLayer->pNzc[iMbXy][iIndex + 4]
|| pDqLayer->pNzc[iMbXy][iIndex + 5]) {
int16_t* pRSI8x8 = &pRS[i << 6];
pIdctResAddPredFunc (pPredI8x8, iLumaStride, pRSI8x8);
}
}
return ERR_NONE;
}
int32_t RecI4x4Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
RecI4x4Luma (iMBXY, pCtx, pScoeffLevel, pDqLayer);
RecI4x4Chroma (iMBXY, pCtx, pScoeffLevel, pDqLayer);
return ERR_NONE;
}
int32_t RecI4x4Luma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
/*****get local variable from outer variable********/
/*prediction info*/

View File

@ -721,6 +721,10 @@ TEST (DecoderDeblocking, FilteringEdgeLumaHV) {
sDqLayer.iMbY = 0; //Only for test easy
sDqLayer.iMbXyIndex = 1; // this function has NO iMbXyIndex validation
bool bTSize8x8Flag[50] = {false};
sDqLayer.pTransformSize8x8Flag = bTSize8x8Flag;
sDqLayer.pTransformSize8x8Flag[sDqLayer.iMbXyIndex] = false;
#define UT_DB_LUMA_TEST(iFlag, iQP, iV0, iV1, iV2) \
iBoundryFlag = iFlag; \
memset(iLumaQP, iQP, sizeof(int8_t)*50); \
@ -777,6 +781,10 @@ TEST (DecoderDeblocking, DeblockingBsMarginalMBAvcbase) {
sDqLayer.pMv[0] = (int16_t (*) [16][2])&iLayerMv[0];
sDqLayer.pMv[1] = (int16_t (*) [16][2])&iLayerMv[1];
bool bTSize8x8Flag[50] = {false};
sDqLayer.pTransformSize8x8Flag = bTSize8x8Flag;
memset (bTSize8x8Flag, 0, sizeof (bool) * 50);
#define UT_DB_CLEAN_STATUS \
memset(iNoZeroCount, 0, sizeof(int8_t)*24*2); \
memset(iLayerRefIndex, 0, sizeof(int8_t)*2*16*2); \
@ -883,6 +891,10 @@ TEST (Deblocking, WelsDeblockingMb) {
sDqLayer.iMbXyIndex = 1;
sDqLayer.iMbWidth = 1;
bool bTSize8x8Flag[50] = {false};
sDqLayer.pTransformSize8x8Flag = bTSize8x8Flag;
memset (bTSize8x8Flag, 0, sizeof (bool) * 50);
uint8_t iY[50] = {0};
sFilter.pCsData[0] = iY;
sFilter.iCsStride[0] = 4;
@ -922,19 +934,19 @@ TEST (Deblocking, WelsDeblockingMb) {
EXPECT_TRUE(iCb[2<<1]==iChromaV1 && iCr[2<<1]==iChromaV1)<<iQP<<" "<<sDqLayer.pMbType[1]; \
EXPECT_TRUE(iCb[(2<<1)*sFilter.iCsStride[1]]==iChromaV2 && iCr[(2<<1)*sFilter.iCsStride[1]]==iChromaV2)<<iQP<<" "<<sDqLayer.pMbType[1];
// QP>16, LEFT & TOP, Intra mode MB_TYPE_INTRA4x4
// QP>16, LEFT & TOP, Intra mode MB_TYPE_INTRA4x4
iQP = 16 + rand() % 35;
sDqLayer.pMbType[1] = MB_TYPE_INTRA4x4;
UT_DB_MACROBLOCK_TEST (0x03, iQP, 2, 1, 1, 2, 1, 1)
// QP>16, LEFT & TOP, Intra mode MB_TYPE_INTRA16x16
// QP>16, LEFT & TOP, Intra mode MB_TYPE_INTRA16x16
iQP = 16 + rand() % 35;
sDqLayer.pMbType[1] = MB_TYPE_INTRA16x16;
UT_DB_MACROBLOCK_TEST (0x03, iQP, 2, 1, 1, 2, 1, 1)
// MbType==0x03, Intra8x8 has not been supported now.
// QP>16, LEFT & TOP, Intra mode MB_TYPE_INTRA_PCM
// QP>16, LEFT & TOP, Intra mode MB_TYPE_INTRA_PCM
iQP = 16 + rand() % 35;
sDqLayer.pMbType[1] = MB_TYPE_INTRA_PCM;
UT_DB_MACROBLOCK_TEST (0x03, iQP, 2, 1, 1, 2, 1, 1)