Based on the aarch64 asm. CPU cycle counts on cortex-a9 compared to gcc 4.8.2: before: 475 decicycles in get_cabac_noinline, 67106035 runs, 2829 skips after: 393 decicycles in get_cabac_noinline, 67106474 runs, 2390 skips Overall speedup is above 2%. Code generated by clang 3.4 is slower on the same hardware and the relative change is a little larger.
		
			
				
	
	
		
			191 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			191 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
 | 
						|
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 | 
						|
 *
 | 
						|
 * This file is part of Libav.
 | 
						|
 *
 | 
						|
 * Libav is free software; you can redistribute it and/or
 | 
						|
 * modify it under the terms of the GNU Lesser General Public
 | 
						|
 * License as published by the Free Software Foundation; either
 | 
						|
 * version 2.1 of the License, or (at your option) any later version.
 | 
						|
 *
 | 
						|
 * Libav is distributed in the hope that it will be useful,
 | 
						|
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
 * Lesser General Public License for more details.
 | 
						|
 *
 | 
						|
 * You should have received a copy of the GNU Lesser General Public
 | 
						|
 * License along with Libav; if not, write to the Free Software
 | 
						|
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
						|
 */
 | 
						|
 | 
						|
/**
 | 
						|
 * @file
 | 
						|
 * Context Adaptive Binary Arithmetic Coder inline functions
 | 
						|
 */
 | 
						|
 | 
						|
#ifndef AVCODEC_CABAC_FUNCTIONS_H
 | 
						|
#define AVCODEC_CABAC_FUNCTIONS_H
 | 
						|
 | 
						|
#include <stdint.h>
 | 
						|
 | 
						|
#include "cabac.h"
 | 
						|
#include "config.h"
 | 
						|
 | 
						|
#if ARCH_AARCH64
 | 
						|
#   include "aarch64/cabac.h"
 | 
						|
#endif
 | 
						|
#if ARCH_ARM
 | 
						|
#   include "arm/cabac.h"
 | 
						|
#endif
 | 
						|
#if ARCH_X86
 | 
						|
#   include "x86/cabac.h"
 | 
						|
#endif
 | 
						|
 | 
						|
static uint8_t * const ff_h264_norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET;
 | 
						|
static uint8_t * const ff_h264_lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET;
 | 
						|
static uint8_t * const ff_h264_mlps_state = ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET;
 | 
						|
static uint8_t * const ff_h264_last_coeff_flag_offset_8x8 = ff_h264_cabac_tables + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET;
 | 
						|
 | 
						|
static void refill(CABACContext *c){
 | 
						|
#if CABAC_BITS == 16
 | 
						|
        c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
 | 
						|
#else
 | 
						|
        c->low+= c->bytestream[0]<<1;
 | 
						|
#endif
 | 
						|
    c->low -= CABAC_MASK;
 | 
						|
    if (c->bytestream < c->bytestream_end)
 | 
						|
        c->bytestream += CABAC_BITS / 8;
 | 
						|
}
 | 
						|
 | 
						|
static inline void renorm_cabac_decoder_once(CABACContext *c){
 | 
						|
    int shift= (uint32_t)(c->range - 0x100)>>31;
 | 
						|
    c->range<<= shift;
 | 
						|
    c->low  <<= shift;
 | 
						|
    if(!(c->low & CABAC_MASK))
 | 
						|
        refill(c);
 | 
						|
}
 | 
						|
 | 
						|
#ifndef get_cabac_inline
 | 
						|
static void refill2(CABACContext *c){
 | 
						|
    int i, x;
 | 
						|
 | 
						|
    x= c->low ^ (c->low-1);
 | 
						|
    i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
 | 
						|
 | 
						|
    x= -CABAC_MASK;
 | 
						|
 | 
						|
#if CABAC_BITS == 16
 | 
						|
        x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
 | 
						|
#else
 | 
						|
        x+= c->bytestream[0]<<1;
 | 
						|
#endif
 | 
						|
 | 
						|
    c->low += x<<i;
 | 
						|
    if (c->bytestream < c->bytestream_end)
 | 
						|
        c->bytestream += CABAC_BITS/8;
 | 
						|
}
 | 
						|
 | 
						|
static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){
 | 
						|
    int s = *state;
 | 
						|
    int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s];
 | 
						|
    int bit, lps_mask;
 | 
						|
 | 
						|
    c->range -= RangeLPS;
 | 
						|
    lps_mask= ((c->range<<(CABAC_BITS+1)) - c->low)>>31;
 | 
						|
 | 
						|
    c->low -= (c->range<<(CABAC_BITS+1)) & lps_mask;
 | 
						|
    c->range += (RangeLPS - c->range) & lps_mask;
 | 
						|
 | 
						|
    s^=lps_mask;
 | 
						|
    *state= (ff_h264_mlps_state+128)[s];
 | 
						|
    bit= s&1;
 | 
						|
 | 
						|
    lps_mask= ff_h264_norm_shift[c->range];
 | 
						|
    c->range<<= lps_mask;
 | 
						|
    c->low  <<= lps_mask;
 | 
						|
    if(!(c->low & CABAC_MASK))
 | 
						|
        refill2(c);
 | 
						|
    return bit;
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
static int av_noinline av_unused get_cabac_noinline(CABACContext *c, uint8_t * const state){
 | 
						|
    return get_cabac_inline(c,state);
 | 
						|
}
 | 
						|
 | 
						|
static int av_unused get_cabac(CABACContext *c, uint8_t * const state){
 | 
						|
    return get_cabac_inline(c,state);
 | 
						|
}
 | 
						|
 | 
						|
#ifndef get_cabac_bypass
 | 
						|
static int av_unused get_cabac_bypass(CABACContext *c){
 | 
						|
    int range;
 | 
						|
    c->low += c->low;
 | 
						|
 | 
						|
    if(!(c->low & CABAC_MASK))
 | 
						|
        refill(c);
 | 
						|
 | 
						|
    range= c->range<<(CABAC_BITS+1);
 | 
						|
    if(c->low < range){
 | 
						|
        return 0;
 | 
						|
    }else{
 | 
						|
        c->low -= range;
 | 
						|
        return 1;
 | 
						|
    }
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef get_cabac_bypass_sign
 | 
						|
static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
 | 
						|
    int range, mask;
 | 
						|
    c->low += c->low;
 | 
						|
 | 
						|
    if(!(c->low & CABAC_MASK))
 | 
						|
        refill(c);
 | 
						|
 | 
						|
    range= c->range<<(CABAC_BITS+1);
 | 
						|
    c->low -= range;
 | 
						|
    mask= c->low >> 31;
 | 
						|
    range &= mask;
 | 
						|
    c->low += range;
 | 
						|
    return (val^mask)-mask;
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
/**
 | 
						|
 *
 | 
						|
 * @return the number of bytes read or 0 if no end
 | 
						|
 */
 | 
						|
static int av_unused get_cabac_terminate(CABACContext *c){
 | 
						|
    c->range -= 2;
 | 
						|
    if(c->low < c->range<<(CABAC_BITS+1)){
 | 
						|
        renorm_cabac_decoder_once(c);
 | 
						|
        return 0;
 | 
						|
    }else{
 | 
						|
        return c->bytestream - c->bytestream_start;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Skip @p n bytes and reset the decoder.
 | 
						|
 * @return the address of the first skipped byte or NULL if there's less than @p n bytes left
 | 
						|
 */
 | 
						|
static av_unused const uint8_t* skip_bytes(CABACContext *c, int n) {
 | 
						|
    const uint8_t *ptr = c->bytestream;
 | 
						|
 | 
						|
    if (c->low & 0x1)
 | 
						|
        ptr--;
 | 
						|
#if CABAC_BITS == 16
 | 
						|
    if (c->low & 0x1FF)
 | 
						|
        ptr--;
 | 
						|
#endif
 | 
						|
    if ((int) (c->bytestream_end - ptr) < n)
 | 
						|
        return NULL;
 | 
						|
    ff_init_cabac_decoder(c, ptr + n, c->bytestream_end - ptr - n);
 | 
						|
 | 
						|
    return ptr;
 | 
						|
}
 | 
						|
 | 
						|
#endif /* AVCODEC_CABAC_FUNCTIONS_H */
 |