ffmpeg/libavcodec/cbrt_tablegen.h
Ganesh Ajjanagadde 07a11ebcab lavc/cbrt_tablegen: speed up tablegen
This exploits an approach based on the sieve of Eratosthenes, a popular
method for generating prime numbers.

Tables are identical to previous ones.

Tested with FATE with/without --enable-hardcoded-tables.

Sample benchmark (Haswell, GNU/Linux+gcc):
prev:
7860100 decicycles in cbrt_tableinit,       1 runs,      0 skips
7777490 decicycles in cbrt_tableinit,       2 runs,      0 skips
[...]
7582339 decicycles in cbrt_tableinit,     256 runs,      0 skips
7563556 decicycles in cbrt_tableinit,     512 runs,      0 skips

new:
2099480 decicycles in cbrt_tableinit,       1 runs,      0 skips
2044470 decicycles in cbrt_tableinit,       2 runs,      0 skips
[...]
1796544 decicycles in cbrt_tableinit,     256 runs,      0 skips
1791631 decicycles in cbrt_tableinit,     512 runs,      0 skips

Both small and large run count given as this is called once so small run
count may give a better picture, small numbers are fairly consistent,
and there is a consistent downward trend from small to large runs,
at which point it stabilizes to a new value.

Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
2016-01-11 17:20:38 -05:00

84 lines
2.4 KiB
C

/*
* Header file for hardcoded AAC cube-root table
*
* Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_CBRT_TABLEGEN_H
#define AVCODEC_CBRT_TABLEGEN_H
#include <stdint.h>
#include <math.h>
#include "libavutil/attributes.h"
#include "libavutil/intfloat.h"
#include "libavcodec/aac_defines.h"
#if USE_FIXED
#define CBRT(x) lrint((x) * 8192)
#else
#define CBRT(x) av_float2int((float)(x))
#endif
#if CONFIG_HARDCODED_TABLES
#if USE_FIXED
#define cbrt_tableinit_fixed()
#include "libavcodec/cbrt_fixed_tables.h"
#else
#define cbrt_tableinit()
#include "libavcodec/cbrt_tables.h"
#endif
#else
static uint32_t cbrt_tab[1 << 13];
static av_cold void AAC_RENAME(cbrt_tableinit)(void)
{
static double cbrt_tab_dbl[1 << 13];
if (!cbrt_tab[(1<<13) - 1]) {
int i, j, k;
double cbrt_val;
for (i = 1; i < 1<<13; i++)
cbrt_tab_dbl[i] = 1;
/* have to take care of non-squarefree numbers */
for (i = 2; i < 90; i++) {
if (cbrt_tab_dbl[i] == 1) {
cbrt_val = i * cbrt(i);
for (k = i; k < 1<<13; k *= i)
for (j = k; j < 1<<13; j += k)
cbrt_tab_dbl[j] *= cbrt_val;
}
}
for (i = 91; i <= 8191; i+= 2) {
if (cbrt_tab_dbl[i] == 1) {
cbrt_val = i * cbrt(i);
for (j = i; j < 1<<13; j += i)
cbrt_tab_dbl[j] *= cbrt_val;
}
}
for (i = 0; i < 1<<13; i++)
cbrt_tab[i] = CBRT(cbrt_tab_dbl[i]);
}
}
#endif /* CONFIG_HARDCODED_TABLES */
#endif /* AVCODEC_CBRT_TABLEGEN_H */