x86: cabac: replace explicit memory references with "m" operands
This replaces the explicit offset(reg) memory references with "m" operands for the same locations. As a result, one fewer register operand is needed for these inline asm statements. Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
		@@ -27,7 +27,7 @@
 | 
			
		||||
#include "config.h"
 | 
			
		||||
 | 
			
		||||
#if HAVE_FAST_CMOV
 | 
			
		||||
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
 | 
			
		||||
#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)\
 | 
			
		||||
        "mov    "tmp"       , %%ecx     \n\t"\
 | 
			
		||||
        "shl    $17         , "tmp"     \n\t"\
 | 
			
		||||
        "cmp    "low"       , "tmp"     \n\t"\
 | 
			
		||||
@@ -37,7 +37,7 @@
 | 
			
		||||
        "xor    %%ecx       , "ret"     \n\t"\
 | 
			
		||||
        "sub    "tmp"       , "low"     \n\t"
 | 
			
		||||
#else /* HAVE_FAST_CMOV */
 | 
			
		||||
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
 | 
			
		||||
#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)\
 | 
			
		||||
        "mov    "tmp"       , %%ecx     \n\t"\
 | 
			
		||||
        "shl    $17         , "tmp"     \n\t"\
 | 
			
		||||
        "sub    "low"       , "tmp"     \n\t"\
 | 
			
		||||
@@ -51,14 +51,13 @@
 | 
			
		||||
        "xor    "tmp"       , "ret"     \n\t"
 | 
			
		||||
#endif /* HAVE_FAST_CMOV */
 | 
			
		||||
 | 
			
		||||
#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte, byte) \
 | 
			
		||||
#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte) \
 | 
			
		||||
        "movzbl "statep"    , "ret"                                     \n\t"\
 | 
			
		||||
        "mov    "range"     , "tmp"                                     \n\t"\
 | 
			
		||||
        "and    $0xC0       , "range"                                   \n\t"\
 | 
			
		||||
        "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
 | 
			
		||||
        "sub    "range"     , "tmp"                                     \n\t"\
 | 
			
		||||
        BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword,        \
 | 
			
		||||
                                    range, tmp)                              \
 | 
			
		||||
        BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)   \
 | 
			
		||||
        "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx          \n\t"\
 | 
			
		||||
        "shl    %%cl        , "range"                                   \n\t"\
 | 
			
		||||
        "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp"          \n\t"\
 | 
			
		||||
@@ -66,8 +65,8 @@
 | 
			
		||||
        "mov    "tmpbyte"   , "statep"                                  \n\t"\
 | 
			
		||||
        "test   "lowword"   , "lowword"                                 \n\t"\
 | 
			
		||||
        " jnz   1f                                                      \n\t"\
 | 
			
		||||
        "mov "byte"("cabac"), %%"REG_c"                                 \n\t"\
 | 
			
		||||
        "add"OPSIZE" $2     , "byte    "("cabac")                       \n\t"\
 | 
			
		||||
        "mov    "byte"      , %%"REG_c"                                 \n\t"\
 | 
			
		||||
        "add"OPSIZE" $2     , "byte"                                    \n\t"\
 | 
			
		||||
        "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
 | 
			
		||||
        "lea    -1("low")   , %%ecx                                     \n\t"\
 | 
			
		||||
        "xor    "low"       , %%ecx                                     \n\t"\
 | 
			
		||||
@@ -82,7 +81,7 @@
 | 
			
		||||
        "add    "tmp"       , "low"                                     \n\t"\
 | 
			
		||||
        "1:                                                             \n\t"
 | 
			
		||||
 | 
			
		||||
#if HAVE_7REGS && !defined(BROKEN_RELOCATIONS)
 | 
			
		||||
#if HAVE_6REGS && !defined(BROKEN_RELOCATIONS)
 | 
			
		||||
#define get_cabac_inline get_cabac_inline_x86
 | 
			
		||||
static av_always_inline int get_cabac_inline_x86(CABACContext *c,
 | 
			
		||||
                                                 uint8_t *const state)
 | 
			
		||||
@@ -90,24 +89,24 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
 | 
			
		||||
    int bit, tmp;
 | 
			
		||||
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        BRANCHLESS_GET_CABAC("%0", "%5", "(%4)", "%1", "%w1", "%2",
 | 
			
		||||
                             "%3", "%b3", "%a6")
 | 
			
		||||
        :"=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp)
 | 
			
		||||
        :"r"(state), "r"(c),
 | 
			
		||||
         "i"(offsetof(CABACContext, bytestream))
 | 
			
		||||
        BRANCHLESS_GET_CABAC("%0", "(%5)", "%1", "%w1", "%2",
 | 
			
		||||
                             "%3", "%b3", "%4")
 | 
			
		||||
        :"=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp),
 | 
			
		||||
         "+m"(c->bytestream)
 | 
			
		||||
        :"r"(state)
 | 
			
		||||
        : "%"REG_c, "memory"
 | 
			
		||||
    );
 | 
			
		||||
    return bit & 1;
 | 
			
		||||
}
 | 
			
		||||
#endif /* HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
 | 
			
		||||
#endif /* HAVE_6REGS && !defined(BROKEN_RELOCATIONS) */
 | 
			
		||||
 | 
			
		||||
#define get_cabac_bypass_sign get_cabac_bypass_sign_x86
 | 
			
		||||
static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
 | 
			
		||||
{
 | 
			
		||||
    x86_reg tmp;
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "movl %a3(%2), %k1                      \n\t"
 | 
			
		||||
        "movl %a4(%2), %%eax                    \n\t"
 | 
			
		||||
        "movl %4, %k1                           \n\t"
 | 
			
		||||
        "movl %2, %%eax                         \n\t"
 | 
			
		||||
        "shl $17, %k1                           \n\t"
 | 
			
		||||
        "add %%eax, %%eax                       \n\t"
 | 
			
		||||
        "sub %k1, %%eax                         \n\t"
 | 
			
		||||
@@ -118,22 +117,20 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
 | 
			
		||||
        "sub %%edx, %%ecx                       \n\t"
 | 
			
		||||
        "test %%ax, %%ax                        \n\t"
 | 
			
		||||
        " jnz 1f                                \n\t"
 | 
			
		||||
        "mov  %a5(%2), %1                       \n\t"
 | 
			
		||||
        "mov  %3, %1                            \n\t"
 | 
			
		||||
        "subl $0xFFFF, %%eax                    \n\t"
 | 
			
		||||
        "movzwl (%1), %%edx                     \n\t"
 | 
			
		||||
        "bswap %%edx                            \n\t"
 | 
			
		||||
        "shrl $15, %%edx                        \n\t"
 | 
			
		||||
        "add  $2, %1                            \n\t"
 | 
			
		||||
        "addl %%edx, %%eax                      \n\t"
 | 
			
		||||
        "mov  %1, %a5(%2)                       \n\t"
 | 
			
		||||
        "mov  %1, %3                            \n\t"
 | 
			
		||||
        "1:                                     \n\t"
 | 
			
		||||
        "movl %%eax, %a4(%2)                    \n\t"
 | 
			
		||||
        "movl %%eax, %2                         \n\t"
 | 
			
		||||
 | 
			
		||||
        :"+c"(val), "=&r"(tmp)
 | 
			
		||||
        :"r"(c),
 | 
			
		||||
         "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
 | 
			
		||||
         "i"(offsetof(CABACContext, bytestream))
 | 
			
		||||
        : "%eax", "%edx", "memory"
 | 
			
		||||
        :"+c"(val), "=&r"(tmp), "+m"(c->low), "+m"(c->bytestream)
 | 
			
		||||
        :"m"(c->range)
 | 
			
		||||
        : "%eax", "%edx"
 | 
			
		||||
    );
 | 
			
		||||
    return val;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -36,7 +36,7 @@
 | 
			
		||||
 | 
			
		||||
//FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
 | 
			
		||||
//as that would make optimization work hard)
 | 
			
		||||
#if HAVE_7REGS && !defined(BROKEN_RELOCATIONS)
 | 
			
		||||
#if HAVE_6REGS && !defined(BROKEN_RELOCATIONS)
 | 
			
		||||
static int decode_significance_x86(CABACContext *c, int max_coeff,
 | 
			
		||||
                                   uint8_t *significant_coeff_ctx_base,
 | 
			
		||||
                                   int *index, x86_reg last_off){
 | 
			
		||||
@@ -48,15 +48,15 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "2:                                     \n\t"
 | 
			
		||||
 | 
			
		||||
        BRANCHLESS_GET_CABAC("%4", "%6", "(%1)", "%3",
 | 
			
		||||
                             "%w3", "%5", "%k0", "%b0", "%a11")
 | 
			
		||||
        BRANCHLESS_GET_CABAC("%4", "(%1)", "%3",
 | 
			
		||||
                             "%w3", "%5", "%k0", "%b0", "%6")
 | 
			
		||||
 | 
			
		||||
        "test $1, %4                            \n\t"
 | 
			
		||||
        " jz 3f                                 \n\t"
 | 
			
		||||
        "add  %10, %1                           \n\t"
 | 
			
		||||
 | 
			
		||||
        BRANCHLESS_GET_CABAC("%4", "%6", "(%1)", "%3",
 | 
			
		||||
                             "%w3", "%5", "%k0", "%b0", "%a11")
 | 
			
		||||
        BRANCHLESS_GET_CABAC("%4", "(%1)", "%3",
 | 
			
		||||
                             "%w3", "%5", "%k0", "%b0", "%6")
 | 
			
		||||
 | 
			
		||||
        "sub  %10, %1                           \n\t"
 | 
			
		||||
        "mov  %2, %0                            \n\t"
 | 
			
		||||
@@ -81,9 +81,9 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 | 
			
		||||
        "add  %9, %k0                           \n\t"
 | 
			
		||||
        "shr $2, %k0                            \n\t"
 | 
			
		||||
        :"=&q"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index),
 | 
			
		||||
         "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
 | 
			
		||||
        :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
 | 
			
		||||
         "i"(offsetof(CABACContext, bytestream))
 | 
			
		||||
         "+&r"(c->low), "=&r"(bit), "+&r"(c->range),
 | 
			
		||||
         "+m"(c->bytestream)
 | 
			
		||||
        :"m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off)
 | 
			
		||||
        : "%"REG_c, "memory"
 | 
			
		||||
    );
 | 
			
		||||
    return coeff_count;
 | 
			
		||||
@@ -105,8 +105,8 @@ static int decode_significance_8x8_x86(CABACContext *c,
 | 
			
		||||
        "movzbl (%0, %6), %k6                   \n\t"
 | 
			
		||||
        "add %9, %6                             \n\t"
 | 
			
		||||
 | 
			
		||||
        BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
 | 
			
		||||
                             "%w3", "%5", "%k0", "%b0", "%a12")
 | 
			
		||||
        BRANCHLESS_GET_CABAC("%4", "(%6)", "%3",
 | 
			
		||||
                             "%w3", "%5", "%k0", "%b0", "%7")
 | 
			
		||||
 | 
			
		||||
        "mov %1, %k6                            \n\t"
 | 
			
		||||
        "test $1, %4                            \n\t"
 | 
			
		||||
@@ -115,8 +115,8 @@ static int decode_significance_8x8_x86(CABACContext *c,
 | 
			
		||||
        "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t"
 | 
			
		||||
        "add %11, %6                            \n\t"
 | 
			
		||||
 | 
			
		||||
        BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
 | 
			
		||||
                             "%w3", "%5", "%k0", "%b0", "%a12")
 | 
			
		||||
        BRANCHLESS_GET_CABAC("%4", "(%6)", "%3",
 | 
			
		||||
                             "%w3", "%5", "%k0", "%b0", "%7")
 | 
			
		||||
 | 
			
		||||
        "mov %2, %0                             \n\t"
 | 
			
		||||
        "mov %1, %k6                            \n\t"
 | 
			
		||||
@@ -138,13 +138,12 @@ static int decode_significance_8x8_x86(CABACContext *c,
 | 
			
		||||
        "addl %8, %k0                           \n\t"
 | 
			
		||||
        "shr $2, %k0                            \n\t"
 | 
			
		||||
        :"=&q"(coeff_count),"+m"(last), "+m"(index), "+&r"(c->low), "=&r"(bit),
 | 
			
		||||
         "+&r"(c->range), "=&r"(state)
 | 
			
		||||
        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base),
 | 
			
		||||
         "i"(offsetof(CABACContext, bytestream))
 | 
			
		||||
         "+&r"(c->range), "=&r"(state), "+m"(c->bytestream)
 | 
			
		||||
        :"m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base)
 | 
			
		||||
        : "%"REG_c, "memory"
 | 
			
		||||
    );
 | 
			
		||||
    return coeff_count;
 | 
			
		||||
}
 | 
			
		||||
#endif /* HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
 | 
			
		||||
#endif /* HAVE_6REGS && !defined(BROKEN_RELOCATIONS) */
 | 
			
		||||
 | 
			
		||||
#endif /* AVCODEC_X86_H264_I386_H */
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user