4:4:4 H.264 decoding support
Note: this is 4:4:4 from the 2007 spec revision, not the previous (now deprecated) 4:4:4 mode in H.264.
This commit is contained in:
@@ -784,7 +784,7 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
|
||||
|
||||
/* draw the edges of width 'w' of an image of size width, height
|
||||
this mmx version can only handle w==8 || w==16 */
|
||||
static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int sides)
|
||||
static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides)
|
||||
{
|
||||
uint8_t *ptr, *last_line;
|
||||
int i;
|
||||
@@ -839,7 +839,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w,
|
||||
|
||||
/* top and bottom (and hopefully also the corners) */
|
||||
if (sides&EDGE_TOP) {
|
||||
for(i = 0; i < w; i += 4) {
|
||||
for(i = 0; i < h; i += 4) {
|
||||
ptr= buf - (i + 1) * wrap - w;
|
||||
__asm__ volatile(
|
||||
"1: \n\t"
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
#if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
|
||||
static int decode_significance_x86(CABACContext *c, int max_coeff,
|
||||
uint8_t *significant_coeff_ctx_base,
|
||||
int *index){
|
||||
int *index, x86_reg last_off){
|
||||
void *end= significant_coeff_ctx_base + max_coeff - 1;
|
||||
int minusstart= -(int)significant_coeff_ctx_base;
|
||||
int minusindex= 4-(int)index;
|
||||
@@ -52,10 +52,12 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
|
||||
|
||||
"test $1, %%edx \n\t"
|
||||
" jz 3f \n\t"
|
||||
"add %7, %1 \n\t"
|
||||
|
||||
BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx",
|
||||
BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx",
|
||||
"%%bx", "%%esi", "%%eax", "%%al")
|
||||
|
||||
"sub %7, %1 \n\t"
|
||||
"mov %2, %%"REG_a" \n\t"
|
||||
"movl %4, %%ecx \n\t"
|
||||
"add %1, %%"REG_c" \n\t"
|
||||
@@ -82,7 +84,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
|
||||
"movl %%esi, "RANGE "(%3) \n\t"
|
||||
"movl %%ebx, "LOW "(%3) \n\t"
|
||||
:"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)
|
||||
:"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)
|
||||
:"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off)
|
||||
: "%"REG_c, "%ebx", "%edx", "%esi", "memory"
|
||||
);
|
||||
return coeff_count;
|
||||
@@ -90,7 +92,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
|
||||
|
||||
static int decode_significance_8x8_x86(CABACContext *c,
|
||||
uint8_t *significant_coeff_ctx_base,
|
||||
int *index, const uint8_t *sig_off){
|
||||
int *index, x86_reg last_off, const uint8_t *sig_off){
|
||||
int minusindex= 4-(int)index;
|
||||
int coeff_count;
|
||||
x86_reg last=0;
|
||||
@@ -114,8 +116,9 @@ static int decode_significance_8x8_x86(CABACContext *c,
|
||||
|
||||
"movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
|
||||
"add %5, %%"REG_D" \n\t"
|
||||
"add %7, %%"REG_D" \n\t"
|
||||
|
||||
BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx",
|
||||
BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx",
|
||||
"%%bx", "%%esi", "%%eax", "%%al")
|
||||
|
||||
"mov %2, %%"REG_a" \n\t"
|
||||
@@ -142,7 +145,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
|
||||
"movl %%esi, "RANGE "(%3) \n\t"
|
||||
"movl %%ebx, "LOW "(%3) \n\t"
|
||||
:"=&a"(coeff_count),"+m"(last), "+m"(index)
|
||||
:"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)
|
||||
:"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off)
|
||||
: "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory"
|
||||
);
|
||||
return coeff_count;
|
||||
|
||||
@@ -32,14 +32,18 @@
|
||||
SECTION_RODATA
|
||||
|
||||
; FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
|
||||
scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8
|
||||
db 6+1*8, 7+1*8, 6+2*8, 7+2*8
|
||||
db 4+3*8, 5+3*8, 4+4*8, 5+4*8
|
||||
db 6+3*8, 7+3*8, 6+4*8, 7+4*8
|
||||
db 1+1*8, 2+1*8
|
||||
db 1+2*8, 2+2*8
|
||||
db 1+4*8, 2+4*8
|
||||
db 1+5*8, 2+5*8
|
||||
scan8_mem: db 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
|
||||
db 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
|
||||
db 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
|
||||
db 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
|
||||
db 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
|
||||
db 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
|
||||
db 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
|
||||
db 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
|
||||
db 4+11*8, 5+11*8, 4+12*8, 5+12*8
|
||||
db 6+11*8, 7+11*8, 6+12*8, 7+12*8
|
||||
db 4+13*8, 5+13*8, 4+14*8, 5+14*8
|
||||
db 6+13*8, 7+13*8, 6+14*8, 7+14*8
|
||||
%ifdef PIC
|
||||
%define scan8 r11
|
||||
%else
|
||||
@@ -617,6 +621,8 @@ cglobal h264_idct_add8_8_mmx, 5, 7, 0
|
||||
mov r10, r0
|
||||
%endif
|
||||
call h264_idct_add8_mmx_plane
|
||||
mov r5, 32
|
||||
add r2, 384
|
||||
%ifdef ARCH_X86_64
|
||||
add r10, gprsize
|
||||
%else
|
||||
@@ -678,6 +684,8 @@ cglobal h264_idct_add8_8_mmx2, 5, 7, 0
|
||||
lea r11, [scan8_mem]
|
||||
%endif
|
||||
call h264_idct_add8_mmx2_plane
|
||||
mov r5, 32
|
||||
add r2, 384
|
||||
%ifdef ARCH_X86_64
|
||||
add r10, gprsize
|
||||
%else
|
||||
@@ -810,12 +818,12 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
|
||||
test r0, r0
|
||||
jz .try%1dc
|
||||
%ifdef ARCH_X86_64
|
||||
mov r0d, dword [r1+%1*8+64]
|
||||
mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
|
||||
add r0, [r10]
|
||||
%else
|
||||
mov r0, r0m
|
||||
mov r0, [r0]
|
||||
add r0, dword [r1+%1*8+64]
|
||||
add r0, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
|
||||
%endif
|
||||
call x264_add8x4_idct_sse2
|
||||
jmp .cycle%1end
|
||||
@@ -824,16 +832,18 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
|
||||
or r0w, word [r2+32]
|
||||
jz .cycle%1end
|
||||
%ifdef ARCH_X86_64
|
||||
mov r0d, dword [r1+%1*8+64]
|
||||
mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
|
||||
add r0, [r10]
|
||||
%else
|
||||
mov r0, r0m
|
||||
mov r0, [r0]
|
||||
add r0, dword [r1+%1*8+64]
|
||||
add r0, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
|
||||
%endif
|
||||
call h264_idct_dc_add8_mmx2
|
||||
.cycle%1end
|
||||
%if %1 < 3
|
||||
%if %1 == 1
|
||||
add r2, 384+64
|
||||
%elif %1 < 3
|
||||
add r2, 64
|
||||
%endif
|
||||
%endmacro
|
||||
@@ -845,15 +855,15 @@ cglobal h264_idct_add8_8_sse2, 5, 7, 8
|
||||
%ifdef ARCH_X86_64
|
||||
mov r10, r0
|
||||
%endif
|
||||
add8_sse2_cycle 0, 0x09
|
||||
add8_sse2_cycle 1, 0x11
|
||||
add8_sse2_cycle 0, 0x34
|
||||
add8_sse2_cycle 1, 0x3c
|
||||
%ifdef ARCH_X86_64
|
||||
add r10, gprsize
|
||||
%else
|
||||
add r0mp, gprsize
|
||||
%endif
|
||||
add8_sse2_cycle 2, 0x21
|
||||
add8_sse2_cycle 3, 0x29
|
||||
add8_sse2_cycle 2, 0x5c
|
||||
add8_sse2_cycle 3, 0x64
|
||||
RET
|
||||
|
||||
;void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul)
|
||||
|
||||
@@ -29,14 +29,18 @@ SECTION_RODATA
|
||||
|
||||
pw_pixel_max: times 8 dw ((1 << 10)-1)
|
||||
pd_32: times 4 dd 32
|
||||
scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8
|
||||
db 6+1*8, 7+1*8, 6+2*8, 7+2*8
|
||||
db 4+3*8, 5+3*8, 4+4*8, 5+4*8
|
||||
db 6+3*8, 7+3*8, 6+4*8, 7+4*8
|
||||
db 1+1*8, 2+1*8
|
||||
db 1+2*8, 2+2*8
|
||||
db 1+4*8, 2+4*8
|
||||
db 1+5*8, 2+5*8
|
||||
scan8_mem: db 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
|
||||
db 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
|
||||
db 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
|
||||
db 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
|
||||
db 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
|
||||
db 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
|
||||
db 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
|
||||
db 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
|
||||
db 4+11*8, 5+11*8, 4+12*8, 5+12*8
|
||||
db 6+11*8, 7+11*8, 6+12*8, 7+12*8
|
||||
db 4+13*8, 5+13*8, 4+14*8, 5+14*8
|
||||
db 6+13*8, 7+13*8, 6+14*8, 7+14*8
|
||||
|
||||
%ifdef PIC
|
||||
%define scan8 r11
|
||||
@@ -306,7 +310,7 @@ INIT_AVX
|
||||
IDCT_ADD16INTRA_10 avx
|
||||
%endif
|
||||
|
||||
%assign last_block 24
|
||||
%assign last_block 36
|
||||
;-----------------------------------------------------------------------------
|
||||
; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
|
||||
;-----------------------------------------------------------------------------
|
||||
@@ -317,21 +321,22 @@ cglobal h264_idct_add8_10_%1,5,7
|
||||
%endif
|
||||
add r2, 1024
|
||||
mov r0, [r0]
|
||||
ADD16_OP_INTRA %1, 16, 1+1*8
|
||||
ADD16_OP_INTRA %1, 18, 1+2*8
|
||||
ADD16_OP_INTRA %1, 16, 4+ 6*8
|
||||
ADD16_OP_INTRA %1, 18, 4+ 7*8
|
||||
add r2, 1024-128*2
|
||||
%ifdef ARCH_X86_64
|
||||
mov r0, [r10+gprsize]
|
||||
%else
|
||||
mov r0, r0m
|
||||
mov r0, [r0+gprsize]
|
||||
%endif
|
||||
ADD16_OP_INTRA %1, 20, 1+4*8
|
||||
ADD16_OP_INTRA %1, 22, 1+5*8
|
||||
ADD16_OP_INTRA %1, 32, 4+11*8
|
||||
ADD16_OP_INTRA %1, 34, 4+12*8
|
||||
REP_RET
|
||||
AC %1, 16
|
||||
AC %1, 18
|
||||
AC %1, 20
|
||||
AC %1, 22
|
||||
AC %1, 32
|
||||
AC %1, 34
|
||||
|
||||
%endmacro ; IDCT_ADD8
|
||||
|
||||
|
||||
Reference in New Issue
Block a user