ff_huff_build_tree uses qsort underneath. AV_QSORT is substantially faster due to the inlining of the comparison callback. Furthermore, this code is reasonably performance critical, since in e.g the fraps codec, ff_huff_build_tree is called on every frame. This routine is also called in vp6 on every frame in some circumstances. Sample benchmark (x86-64, Haswell, GNU/Linux), vp6 from FATE: vp6 (old): 78930 decicycles in qsort, 1 runs, 0 skips 45330 decicycles in qsort, 2 runs, 0 skips 27825 decicycles in qsort, 4 runs, 0 skips 17471 decicycles in qsort, 8 runs, 0 skips 12296 decicycles in qsort, 16 runs, 0 skips 9554 decicycles in qsort, 32 runs, 0 skips 8404 decicycles in qsort, 64 runs, 0 skips 7405 decicycles in qsort, 128 runs, 0 skips 6740 decicycles in qsort, 256 runs, 0 skips 7540 decicycles in qsort, 512 runs, 0 skips 9498 decicycles in qsort, 1024 runs, 0 skips 9938 decicycles in qsort, 2048 runs, 0 skips 8043 decicycles in qsort, 4095 runs, 1 skips vp6 (new): 15880 decicycles in qsort, 1 runs, 0 skips 10730 decicycles in qsort, 2 runs, 0 skips 10155 decicycles in qsort, 4 runs, 0 skips 7805 decicycles in qsort, 8 runs, 0 skips 6883 decicycles in qsort, 16 runs, 0 skips 6305 decicycles in qsort, 32 runs, 0 skips 5854 decicycles in qsort, 64 runs, 0 skips 5152 decicycles in qsort, 128 runs, 0 skips 4452 decicycles in qsort, 256 runs, 0 skips 4161 decicycles in qsort, 511 runs, 1 skips 4081 decicycles in qsort, 1023 runs, 1 skips 4072 decicycles in qsort, 2047 runs, 1 skips 4004 decicycles in qsort, 4095 runs, 1 skips Reviewed-by: Timothy Gu <timothygu99@gmail.com> Reviewed-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
		
			
				
	
	
		
			199 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			199 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * Copyright (c) 2006 Konstantin Shishkov
 | 
						|
 * Copyright (c) 2007 Loren Merritt
 | 
						|
 *
 | 
						|
 * This file is part of FFmpeg.
 | 
						|
 *
 | 
						|
 * FFmpeg is free software; you can redistribute it and/or
 | 
						|
 * modify it under the terms of the GNU Lesser General Public
 | 
						|
 * License as published by the Free Software Foundation; either
 | 
						|
 * version 2.1 of the License, or (at your option) any later version.
 | 
						|
 *
 | 
						|
 * FFmpeg is distributed in the hope that it will be useful,
 | 
						|
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
 * Lesser General Public License for more details.
 | 
						|
 *
 | 
						|
 * You should have received a copy of the GNU Lesser General Public
 | 
						|
 * License along with FFmpeg; if not, write to the Free Software
 | 
						|
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
						|
 */
 | 
						|
 | 
						|
/**
 | 
						|
 * @file
 | 
						|
 * huffman tree builder and VLC generator
 | 
						|
 */
 | 
						|
 | 
						|
#include <stdint.h>
 | 
						|
 | 
						|
#include "libavutil/qsort.h"
 | 
						|
#include "avcodec.h"
 | 
						|
#include "get_bits.h"
 | 
						|
#include "huffman.h"
 | 
						|
 | 
						|
/* symbol for Huffman tree node */
 | 
						|
#define HNODE -1
 | 
						|
 | 
						|
typedef struct HeapElem {
 | 
						|
    uint64_t val;
 | 
						|
    int name;
 | 
						|
} HeapElem;
 | 
						|
 | 
						|
static void heap_sift(HeapElem *h, int root, int size)
 | 
						|
{
 | 
						|
    while (root * 2 + 1 < size) {
 | 
						|
        int child = root * 2 + 1;
 | 
						|
        if (child < size - 1 && h[child].val > h[child+1].val)
 | 
						|
            child++;
 | 
						|
        if (h[root].val > h[child].val) {
 | 
						|
            FFSWAP(HeapElem, h[root], h[child]);
 | 
						|
            root = child;
 | 
						|
        } else
 | 
						|
            break;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
int ff_huff_gen_len_table(uint8_t *dst, const uint64_t *stats, int stats_size, int skip0)
 | 
						|
{
 | 
						|
    HeapElem *h  = av_malloc_array(sizeof(*h), stats_size);
 | 
						|
    int *up      = av_malloc_array(sizeof(*up) * 2, stats_size);
 | 
						|
    uint8_t *len = av_malloc_array(sizeof(*len) * 2, stats_size);
 | 
						|
    uint16_t *map= av_malloc_array(sizeof(*map), stats_size);
 | 
						|
    int offset, i, next;
 | 
						|
    int size = 0;
 | 
						|
    int ret = 0;
 | 
						|
 | 
						|
    if (!h || !up || !len || !map) {
 | 
						|
        ret = AVERROR(ENOMEM);
 | 
						|
        goto end;
 | 
						|
    }
 | 
						|
 | 
						|
    for (i = 0; i<stats_size; i++) {
 | 
						|
        dst[i] = 255;
 | 
						|
        if (stats[i] || !skip0)
 | 
						|
            map[size++] = i;
 | 
						|
    }
 | 
						|
 | 
						|
    for (offset = 1; ; offset <<= 1) {
 | 
						|
        for (i=0; i < size; i++) {
 | 
						|
            h[i].name = i;
 | 
						|
            h[i].val = (stats[map[i]] << 14) + offset;
 | 
						|
        }
 | 
						|
        for (i = size / 2 - 1; i >= 0; i--)
 | 
						|
            heap_sift(h, i, size);
 | 
						|
 | 
						|
        for (next = size; next < size * 2 - 1; next++) {
 | 
						|
            // merge the two smallest entries, and put it back in the heap
 | 
						|
            uint64_t min1v = h[0].val;
 | 
						|
            up[h[0].name] = next;
 | 
						|
            h[0].val = INT64_MAX;
 | 
						|
            heap_sift(h, 0, size);
 | 
						|
            up[h[0].name] = next;
 | 
						|
            h[0].name = next;
 | 
						|
            h[0].val += min1v;
 | 
						|
            heap_sift(h, 0, size);
 | 
						|
        }
 | 
						|
 | 
						|
        len[2 * size - 2] = 0;
 | 
						|
        for (i = 2 * size - 3; i >= size; i--)
 | 
						|
            len[i] = len[up[i]] + 1;
 | 
						|
        for (i = 0; i < size; i++) {
 | 
						|
            dst[map[i]] = len[up[i]] + 1;
 | 
						|
            if (dst[map[i]] >= 32) break;
 | 
						|
        }
 | 
						|
        if (i==size) break;
 | 
						|
    }
 | 
						|
end:
 | 
						|
    av_free(h);
 | 
						|
    av_free(up);
 | 
						|
    av_free(len);
 | 
						|
    av_free(map);
 | 
						|
    return ret;
 | 
						|
}
 | 
						|
 | 
						|
static void get_tree_codes(uint32_t *bits, int16_t *lens, uint8_t *xlat,
 | 
						|
                           Node *nodes, int node,
 | 
						|
                           uint32_t pfx, int pl, int *pos, int no_zero_count)
 | 
						|
{
 | 
						|
    int s;
 | 
						|
 | 
						|
    s = nodes[node].sym;
 | 
						|
    if (s != HNODE || (no_zero_count && !nodes[node].count)) {
 | 
						|
        bits[*pos] = pfx;
 | 
						|
        lens[*pos] = pl;
 | 
						|
        xlat[*pos] = s;
 | 
						|
        (*pos)++;
 | 
						|
    } else {
 | 
						|
        pfx <<= 1;
 | 
						|
        pl++;
 | 
						|
        get_tree_codes(bits, lens, xlat, nodes, nodes[node].n0, pfx, pl,
 | 
						|
                       pos, no_zero_count);
 | 
						|
        pfx |= 1;
 | 
						|
        get_tree_codes(bits, lens, xlat, nodes, nodes[node].n0 + 1, pfx, pl,
 | 
						|
                       pos, no_zero_count);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static int build_huff_tree(VLC *vlc, Node *nodes, int head, int flags, int nb_bits)
 | 
						|
{
 | 
						|
    int no_zero_count = !(flags & FF_HUFFMAN_FLAG_ZERO_COUNT);
 | 
						|
    uint32_t bits[256];
 | 
						|
    int16_t lens[256];
 | 
						|
    uint8_t xlat[256];
 | 
						|
    int pos = 0;
 | 
						|
 | 
						|
    get_tree_codes(bits, lens, xlat, nodes, head, 0, 0,
 | 
						|
                   &pos, no_zero_count);
 | 
						|
    return ff_init_vlc_sparse(vlc, nb_bits, pos, lens, 2, 2, bits, 4, 4, xlat, 1, 1, 0);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/**
 | 
						|
 * nodes size must be 2*nb_codes
 | 
						|
 * first nb_codes nodes.count must be set
 | 
						|
 */
 | 
						|
int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, int nb_codes, int nb_bits,
 | 
						|
                       Node *nodes, HuffCmp cmp, int flags)
 | 
						|
{
 | 
						|
    int i, j;
 | 
						|
    int cur_node;
 | 
						|
    int64_t sum = 0;
 | 
						|
 | 
						|
    for (i = 0; i < nb_codes; i++) {
 | 
						|
        nodes[i].sym = i;
 | 
						|
        nodes[i].n0 = -2;
 | 
						|
        sum += nodes[i].count;
 | 
						|
    }
 | 
						|
 | 
						|
    if (sum >> 31) {
 | 
						|
        av_log(avctx, AV_LOG_ERROR,
 | 
						|
               "Too high symbol frequencies. "
 | 
						|
               "Tree construction is not possible\n");
 | 
						|
        return -1;
 | 
						|
    }
 | 
						|
    AV_QSORT(nodes, nb_codes, Node, cmp);
 | 
						|
    cur_node = nb_codes;
 | 
						|
    nodes[nb_codes*2-1].count = 0;
 | 
						|
    for (i = 0; i < nb_codes * 2 - 1; i += 2) {
 | 
						|
        uint32_t cur_count = nodes[i].count + nodes[i+1].count;
 | 
						|
        // find correct place to insert new node, and
 | 
						|
        // make space for the new node while at it
 | 
						|
        for(j = cur_node; j > i + 2; j--){
 | 
						|
            if(cur_count > nodes[j-1].count ||
 | 
						|
               (cur_count == nodes[j-1].count &&
 | 
						|
                !(flags & FF_HUFFMAN_FLAG_HNODE_FIRST)))
 | 
						|
                break;
 | 
						|
            nodes[j] = nodes[j - 1];
 | 
						|
        }
 | 
						|
        nodes[j].sym = HNODE;
 | 
						|
        nodes[j].count = cur_count;
 | 
						|
        nodes[j].n0 = i;
 | 
						|
        cur_node++;
 | 
						|
    }
 | 
						|
    if (build_huff_tree(vlc, nodes, nb_codes * 2 - 2, flags, nb_bits) < 0) {
 | 
						|
        av_log(avctx, AV_LOG_ERROR, "Error building tree\n");
 | 
						|
        return -1;
 | 
						|
    }
 | 
						|
    return 0;
 | 
						|
}
 |