include: reformat using new code style

Signed-off-by: Marcel Cornu <marcel.d.cornu@intel.com>
This commit is contained in:
Marcel Cornu
2024-04-19 17:09:27 +01:00
committed by Pablo de Lara
parent 55fbfabfc6
commit fa5b8baf84
10 changed files with 1013 additions and 890 deletions

View File

@@ -47,35 +47,39 @@ extern "C" {
#include <stdint.h>
#ifdef _MSC_VER
# define inline __inline
#define inline __inline
#endif
/* Make os-independent alignment attribute, alloc and free. */
#if defined __unix__ || defined __APPLE__
# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
# define __forceinline static inline
# define aligned_free(x) free(x)
#if defined __unix__ || defined __APPLE__
#define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
#define __forceinline static inline
#define aligned_free(x) free(x)
#else
# ifdef __MINGW32__
# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn)))
# define aligned_free(x) _aligned_free(x)
# else
# define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn)))
# define aligned_free(x) _aligned_free(x)
# endif
#ifdef __MINGW32__
#define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
#define posix_memalign(p, algn, len) \
(NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
#define aligned_free(x) _aligned_free(x)
#else
#define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
#define posix_memalign(p, algn, len) \
(NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn)))
#define aligned_free(x) _aligned_free(x)
#endif
#endif
#ifdef DEBUG
# define DEBUG_PRINT(x) printf x
#define DEBUG_PRINT(x) printf x
#else
# define DEBUG_PRINT(x) do {} while (0)
#define DEBUG_PRINT(x) \
do { \
} while (0)
#endif
/* Decide whether to use benchmark time as an approximation or a minimum. Fewer
* calls to the timer are required for the approximation case.*/
#define BENCHMARK_MIN_TIME 0
#define BENCHMARK_MIN_TIME 0
#define BENCHMARK_APPROX_TIME 1
#ifndef BENCHMARK_TYPE
#define BENCHMARK_TYPE BENCHMARK_MIN_TIME
@@ -86,228 +90,260 @@ extern "C" {
* standardized clock source. To obtain a meaningful result it may be
* necessary to fix the CPU clock to match the rtdsc tick rate.
*/
# include <inttypes.h>
# include <x86intrin.h>
# define USE_CYCLES
#include <inttypes.h>
#include <x86intrin.h>
#define USE_CYCLES
#else
# include <time.h>
#include <time.h>
#define USE_SECONDS
#endif
#ifdef USE_RDTSC
#ifndef BENCHMARK_TIME
# define BENCHMARK_TIME 6
#define BENCHMARK_TIME 6
#endif
# define GHZ 1000000000
# define UNIT_SCALE (GHZ)
# define CALIBRATE_TIME (UNIT_SCALE / 2)
static inline long long get_time(void) {
unsigned int dummy;
return __rdtscp(&dummy);
#define GHZ 1000000000
#define UNIT_SCALE (GHZ)
#define CALIBRATE_TIME (UNIT_SCALE / 2)
static inline long long
get_time(void)
{
unsigned int dummy;
return __rdtscp(&dummy);
}
static inline long long get_res(void) {
return 1;
static inline long long
get_res(void)
{
return 1;
}
#else
#ifndef BENCHMARK_TIME
# define BENCHMARK_TIME 3
#define BENCHMARK_TIME 3
#endif
#ifdef _MSC_VER
#define UNIT_SCALE get_res()
#define UNIT_SCALE get_res()
#define CALIBRATE_TIME (UNIT_SCALE / 4)
static inline long long get_time(void) {
long long ret = 0;
QueryPerformanceCounter(&ret);
return ret;
static inline long long
get_time(void)
{
long long ret = 0;
QueryPerformanceCounter(&ret);
return ret;
}
static inline long long get_res(void) {
long long ret = 0;
QueryPerformanceFrequency(&ret);
return ret;
static inline long long
get_res(void)
{
long long ret = 0;
QueryPerformanceFrequency(&ret);
return ret;
}
#else
# define NANO_SCALE 1000000000
# define UNIT_SCALE NANO_SCALE
# define CALIBRATE_TIME (UNIT_SCALE / 4)
#define NANO_SCALE 1000000000
#define UNIT_SCALE NANO_SCALE
#define CALIBRATE_TIME (UNIT_SCALE / 4)
#ifdef __FreeBSD__
# define CLOCK_ID CLOCK_MONOTONIC_PRECISE
#define CLOCK_ID CLOCK_MONOTONIC_PRECISE
#else
# define CLOCK_ID CLOCK_MONOTONIC
#define CLOCK_ID CLOCK_MONOTONIC
#endif
static inline long long get_time(void) {
struct timespec time;
long long nano_total;
clock_gettime(CLOCK_ID, &time);
nano_total = time.tv_sec;
nano_total *= NANO_SCALE;
nano_total += time.tv_nsec;
return nano_total;
static inline long long
get_time(void)
{
struct timespec time;
long long nano_total;
clock_gettime(CLOCK_ID, &time);
nano_total = time.tv_sec;
nano_total *= NANO_SCALE;
nano_total += time.tv_nsec;
return nano_total;
}
static inline long long get_res(void) {
struct timespec time;
long long nano_total;
clock_getres(CLOCK_ID, &time);
nano_total = time.tv_sec;
nano_total *= NANO_SCALE;
nano_total += time.tv_nsec;
return nano_total;
static inline long long
get_res(void)
{
struct timespec time;
long long nano_total;
clock_getres(CLOCK_ID, &time);
nano_total = time.tv_sec;
nano_total *= NANO_SCALE;
nano_total += time.tv_nsec;
return nano_total;
}
#endif
#endif
struct perf {
long long start;
long long stop;
long long run_total;
long long iterations;
long long start;
long long stop;
long long run_total;
long long iterations;
};
static inline void perf_init(struct perf *p) {
p->start = 0;
p->stop = 0;
p->run_total = 0;
static inline void
perf_init(struct perf *p)
{
p->start = 0;
p->stop = 0;
p->run_total = 0;
}
static inline void perf_continue(struct perf *p) {
p->start = get_time();
static inline void
perf_continue(struct perf *p)
{
p->start = get_time();
}
static inline void perf_pause(struct perf *p) {
p->stop = get_time();
p->run_total = p->run_total + p->stop - p->start;
p->start = p->stop;
static inline void
perf_pause(struct perf *p)
{
p->stop = get_time();
p->run_total = p->run_total + p->stop - p->start;
p->start = p->stop;
}
static inline void perf_start(struct perf *p) {
perf_init(p);
perf_continue(p);
static inline void
perf_start(struct perf *p)
{
perf_init(p);
perf_continue(p);
}
static inline void perf_stop(struct perf *p) {
perf_pause(p);
static inline void
perf_stop(struct perf *p)
{
perf_pause(p);
}
static inline double get_time_elapsed(struct perf *p) {
return 1.0 * p->run_total / UNIT_SCALE;
static inline double
get_time_elapsed(struct perf *p)
{
return 1.0 * p->run_total / UNIT_SCALE;
}
static inline long long get_base_elapsed(struct perf *p) {
return p->run_total;
static inline long long
get_base_elapsed(struct perf *p)
{
return p->run_total;
}
static inline unsigned long long estimate_perf_iterations(struct perf *p,
unsigned long long runs,
unsigned long long total) {
total = total * runs;
if (get_base_elapsed(p) > 0)
return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p);
else
return (total + get_res() - 1) / get_res();
static inline unsigned long long
estimate_perf_iterations(struct perf *p, unsigned long long runs, unsigned long long total)
{
total = total * runs;
if (get_base_elapsed(p) > 0)
return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p);
else
return (total + get_res() - 1) / get_res();
}
#define CALIBRATE(PERF, FUNC_CALL) { \
unsigned long long _i, _iter = 1; \
perf_start(PERF); \
FUNC_CALL; \
perf_pause(PERF); \
\
while (get_base_elapsed(PERF) < CALIBRATE_TIME) { \
_iter = estimate_perf_iterations(PERF, _iter, \
2 * CALIBRATE_TIME); \
perf_start(PERF); \
for (_i = 0; _i < _iter; _i++) { \
FUNC_CALL; \
} \
perf_stop(PERF); \
} \
(PERF)->iterations=_iter; \
}
#define CALIBRATE(PERF, FUNC_CALL) \
{ \
unsigned long long _i, _iter = 1; \
perf_start(PERF); \
FUNC_CALL; \
perf_pause(PERF); \
\
while (get_base_elapsed(PERF) < CALIBRATE_TIME) { \
_iter = estimate_perf_iterations(PERF, _iter, 2 * CALIBRATE_TIME); \
perf_start(PERF); \
for (_i = 0; _i < _iter; _i++) { \
FUNC_CALL; \
} \
perf_stop(PERF); \
} \
(PERF)->iterations = _iter; \
}
#define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL) { \
unsigned long long _i, _iter = (PERF)->iterations; \
unsigned long long _run_total = RUN_TIME; \
_run_total *= UNIT_SCALE; \
_iter = estimate_perf_iterations(PERF, _iter, _run_total);\
(PERF)->iterations = 0; \
perf_start(PERF); \
for (_i = 0; _i < _iter; _i++) { \
FUNC_CALL; \
} \
perf_pause(PERF); \
(PERF)->iterations += _iter; \
\
if(get_base_elapsed(PERF) < _run_total && \
BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \
_iter = estimate_perf_iterations(PERF, _iter, \
_run_total - get_base_elapsed(PERF) + \
(UNIT_SCALE / 16)); \
perf_continue(PERF); \
for (_i = 0; _i < _iter; _i++) { \
FUNC_CALL; \
} \
perf_pause(PERF); \
(PERF)->iterations += _iter; \
} \
}
#define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL) \
{ \
unsigned long long _i, _iter = (PERF)->iterations; \
unsigned long long _run_total = RUN_TIME; \
_run_total *= UNIT_SCALE; \
_iter = estimate_perf_iterations(PERF, _iter, _run_total); \
(PERF)->iterations = 0; \
perf_start(PERF); \
for (_i = 0; _i < _iter; _i++) { \
FUNC_CALL; \
} \
perf_pause(PERF); \
(PERF)->iterations += _iter; \
\
if (get_base_elapsed(PERF) < _run_total && BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \
_iter = estimate_perf_iterations(PERF, _iter, \
_run_total - get_base_elapsed(PERF) + \
(UNIT_SCALE / 16)); \
perf_continue(PERF); \
for (_i = 0; _i < _iter; _i++) { \
FUNC_CALL; \
} \
perf_pause(PERF); \
(PERF)->iterations += _iter; \
} \
}
#define BENCHMARK(PERF, RUN_TIME, FUNC_CALL) { \
if((RUN_TIME) > 0) { \
CALIBRATE(PERF, FUNC_CALL); \
PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL); \
\
} else { \
(PERF)->iterations = 1; \
perf_start(PERF); \
FUNC_CALL; \
perf_stop(PERF); \
} \
}
#define BENCHMARK(PERF, RUN_TIME, FUNC_CALL) \
{ \
if ((RUN_TIME) > 0) { \
CALIBRATE(PERF, FUNC_CALL); \
PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL); \
\
} else { \
(PERF)->iterations = 1; \
perf_start(PERF); \
FUNC_CALL; \
perf_stop(PERF); \
} \
}
#ifdef USE_CYCLES
static inline void perf_print(struct perf p, long long unit_count) {
long long total_units = p.iterations * unit_count;
static inline void
perf_print(struct perf p, long long unit_count)
{
long long total_units = p.iterations * unit_count;
printf("runtime = %10lld ticks", get_base_elapsed(&p));
if (total_units != 0) {
printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte",
total_units / (1000000), get_time_elapsed(&p),
get_base_elapsed(&p) / (double)total_units);
}
printf("\n");
printf("runtime = %10lld ticks", get_base_elapsed(&p));
if (total_units != 0) {
printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte", total_units / (1000000),
get_time_elapsed(&p), get_base_elapsed(&p) / (double) total_units);
}
printf("\n");
}
#else
static inline void perf_print(struct perf p, double unit_count) {
long long total_units = p.iterations * unit_count;
long long usecs = (long long)(get_time_elapsed(&p) * 1000000);
static inline void
perf_print(struct perf p, double unit_count)
{
long long total_units = p.iterations * unit_count;
long long usecs = (long long) (get_time_elapsed(&p) * 1000000);
printf("runtime = %10lld usecs", usecs);
if (total_units != 0) {
printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s",
total_units / (1000000), get_time_elapsed(&p),
((double)total_units) / (1000000 * get_time_elapsed(&p)));
}
printf("\n");
printf("runtime = %10lld usecs", usecs);
if (total_units != 0) {
printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s", total_units / (1000000),
get_time_elapsed(&p),
((double) total_units) / (1000000 * get_time_elapsed(&p)));
}
printf("\n");
}
#endif
static inline uint64_t get_filesize(FILE * fp) {
uint64_t file_size;
fpos_t pos, pos_curr;
static inline uint64_t
get_filesize(FILE *fp)
{
uint64_t file_size;
fpos_t pos, pos_curr;
fgetpos(fp, &pos_curr); /* Save current position */
fgetpos(fp, &pos_curr); /* Save current position */
#if defined(_WIN32) || defined(_WIN64)
_fseeki64(fp, 0, SEEK_END);
_fseeki64(fp, 0, SEEK_END);
#else
fseeko(fp, 0, SEEK_END);
fseeko(fp, 0, SEEK_END);
#endif
fgetpos(fp, &pos);
file_size = *(uint64_t *) & pos;
fsetpos(fp, &pos_curr); /* Restore position */
fgetpos(fp, &pos);
file_size = *(uint64_t *) &pos;
fsetpos(fp, &pos_curr); /* Restore position */
return file_size;
return file_size;
}
#ifdef __cplusplus