From 297b27655eaf4a0cfd097b2ae94add4b19fed9eb Mon Sep 17 00:00:00 2001 From: Attila Nagy Date: Fri, 25 Mar 2011 12:53:03 +0200 Subject: [PATCH] Runtime detection of available processor cores. Detect the number of available cores and limit the thread allocation accordingly. On decoder side limit the number of threads to the max number of token partition. Core detetction works on Windows and Posix platforms, which define _SC_NPROCESSORS_ONLN or _SC_NPROC_ONLN. Change-Id: I76cbe37c18d3b8035e508b7a1795577674efc078 --- build/make/configure.sh | 3 ++ configure | 1 + vp8/common/generic/systemdependent.c | 48 ++++++++++++++++++++++++++++ vp8/common/onyxc_int.h | 3 ++ vp8/decoder/threading.c | 10 ++++-- vp8/encoder/ethreading.c | 8 ++--- vp8/encoder/onyx_int.h | 1 - 7 files changed, 67 insertions(+), 7 deletions(-) diff --git a/build/make/configure.sh b/build/make/configure.sh index a48fd9faf..cdd7b9ce4 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -979,6 +979,9 @@ EOF esac fi + # for sysconf(3) and friends. + check_header unistd.h + # glibc needs these if enabled linux; then add_cflags -D_LARGEFILE_SOURCE diff --git a/configure b/configure index f92ffc5ea..c6805b0af 100755 --- a/configure +++ b/configure @@ -211,6 +211,7 @@ HAVE_LIST=" alt_tree_layout pthread_h sys_mman_h + unistd_h " CONFIG_LIST=" external_build diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index 5c6464772..4131d3c3c 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -17,9 +17,54 @@ #include "vp8/common/idct.h" #include "vp8/common/onyxc_int.h" +#if CONFIG_MULTITHREAD +#if HAVE_UNISTD_H +#include +#elif defined(_WIN32) +#include +typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO); +#endif +#endif + extern void vp8_arch_x86_common_init(VP8_COMMON *ctx); extern void vp8_arch_arm_common_init(VP8_COMMON *ctx); +#if CONFIG_MULTITHREAD +static int get_cpu_count() +{ + int core_count = 16; + +#if HAVE_UNISTD_H +#if defined(_SC_NPROCESSORS_ONLN) + core_count = sysconf(_SC_NPROCESSORS_ONLN); +#elif defined(_SC_NPROC_ONLN) + core_count = sysconf(_SC_NPROC_ONLN); +#endif +#elif defined(_WIN32) + { + PGNSI pGNSI; + SYSTEM_INFO sysinfo; + + /* Call GetNativeSystemInfo if supported or + * GetSystemInfo otherwise. */ + + pGNSI = (PGNSI) GetProcAddress( + GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo"); + if (pGNSI != NULL) + pGNSI(&sysinfo); + else + GetSystemInfo(&sysinfo); + + core_count = sysinfo.dwNumberOfProcessors; + } +#else + /* other platforms */ +#endif + + return core_count > 0 ? core_count : 1; +} +#endif + void vp8_machine_specific_config(VP8_COMMON *ctx) { #if CONFIG_RUNTIME_CPU_DETECT @@ -82,4 +127,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) vp8_arch_arm_common_init(ctx); #endif +#if CONFIG_MULTITHREAD + ctx->processor_core_count = get_cpu_count(); +#endif /* CONFIG_MULTITHREAD */ } diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index c8c227787..60737e40c 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -195,6 +195,9 @@ typedef struct VP8Common #if CONFIG_RUNTIME_CPU_DETECT VP8_COMMON_RTCD rtcd; +#endif +#if CONFIG_MULTITHREAD + int processor_core_count; #endif struct postproc_state postproc_state; } VP8_COMMON; diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index 3d9d428ef..4cc7f534d 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -429,12 +429,18 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) pbi->b_multithreaded_rd = 0; pbi->allocated_decoding_thread_count = 0; - core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads; + + /* limit decoding threads to the max number of token partitions */ + core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads; + + /* limit decoding threads to the available cores */ + if (core_count > pbi->common.processor_core_count) + core_count = pbi->common.processor_core_count; if (core_count > 1) { pbi->b_multithreaded_rd = 1; - pbi->decoding_thread_count = core_count -1; + pbi->decoding_thread_count = core_count - 1; CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count)); CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count)); diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 71da1036a..fa6a943d1 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -459,15 +459,15 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi) cpi->b_multi_threaded = 0; cpi->encoding_thread_count = 0; - cpi->processor_core_count = 32; //vp8_get_proc_core_count(); - if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) + if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) { int ithread; int th_count = cpi->oxcf.multi_threaded - 1; - if (cpi->oxcf.multi_threaded > cpi->processor_core_count) - th_count = cpi->processor_core_count - 1; + /* don't allocate more threads than cores available */ + if (cpi->oxcf.multi_threaded > cm->processor_core_count) + th_count = cm->processor_core_count - 1; /* we have th_count + 1 (main) threads processing one row each */ /* no point to have more threads than the sync range allows */ diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 0e53f6803..7282ef8ff 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -594,7 +594,6 @@ typedef struct // multithread data int * mt_current_mb_col; int mt_sync_range; - int processor_core_count; int b_multi_threaded; int encoding_thread_count;