Merge remote branch 'internal/upstream' into HEAD

This commit is contained in:
John Koleszar 2011-05-06 00:05:10 -04:00
commit 39e36f8604
12 changed files with 91 additions and 64 deletions

View File

@ -980,6 +980,9 @@ EOF
esac
fi
# for sysconf(3) and friends.
check_header unistd.h
# glibc needs these
if enabled linux; then
add_cflags -D_LARGEFILE_SOURCE

1
configure vendored
View File

@ -211,6 +211,7 @@ HAVE_LIST="
alt_tree_layout
pthread_h
sys_mman_h
unistd_h
"
EXPERIMENT_LIST="
extend_qrange

View File

@ -308,7 +308,6 @@
; q9 q2
; q10 q3
|vp8_loop_filter_neon| PROC
ldr r12, _lf_coeff_
; vp8_filter_mask
vabd.u8 q11, q3, q4 ; abs(p3 - p2)
@ -339,7 +338,7 @@
vqadd.u8 q9, q9, q2 ; a = b + a
vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
vld1.u8 {q0}, [r12]!
vmov.u8 q0, #0x80 ; 0x80
; vp8_filter() function
; convert to signed
@ -348,7 +347,7 @@
veor q5, q5, q0 ; ps1
veor q8, q8, q0 ; qs1
vld1.u8 {q10}, [r12]!
vmov.u8 q10, #3 ; #3
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
vsubl.s8 q11, d15, d13
@ -367,7 +366,7 @@
vaddw.s8 q2, q2, d2
vaddw.s8 q11, q11, d3
vld1.u8 {q9}, [r12]!
vmov.u8 q9, #4 ; #4
; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
vqmovn.s16 d2, q2
@ -399,12 +398,4 @@
;-----------------
_lf_coeff_
DCD lf_coeff
lf_coeff
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101
END

View File

@ -22,20 +22,19 @@
; r1 int p, //pitch
; r2 const signed char *flimit,
; r3 const signed char *limit,
; stack(r4) const signed char *thresh,
; stack(r4) const signed char *thresh (unused)
; //stack(r5) int count --unused
|vp8_loop_filter_simple_horizontal_edge_neon| PROC
sub r0, r0, r1, lsl #1 ; move src pointer down by 2 lines
ldr r12, _lfhy_coeff_
vld1.u8 {q5}, [r0], r1 ; p1
vld1.s8 {d2[], d3[]}, [r2] ; flimit
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
vld1.u8 {q6}, [r0], r1 ; p0
vld1.u8 {q0}, [r12]! ; 0x80
vmov.u8 q0, #0x80 ; 0x80
vld1.u8 {q7}, [r0], r1 ; q0
vld1.u8 {q10}, [r12]! ; 0x03
vmov.u8 q10, #0x03 ; 0x03
vld1.u8 {q8}, [r0] ; q1
;vp8_filter_mask() function
@ -66,7 +65,7 @@
vadd.s16 q11, q2, q2 ; 3 * ( qs0 - ps0)
vadd.s16 q12, q3, q3
vld1.u8 {q9}, [r12]! ; 0x04
vmov.u8 q9, #0x04 ; 0x04
vadd.s16 q2, q2, q11
vadd.s16 q3, q3, q12
@ -105,11 +104,4 @@
;-----------------
_lfhy_coeff_
DCD lfhy_coeff
lfhy_coeff
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
END

View File

@ -22,7 +22,7 @@
; r1 int p, //pitch
; r2 const signed char *flimit,
; r3 const signed char *limit,
; stack(r4) const signed char *thresh,
; stack(r4) const signed char *thresh (unused)
; //stack(r5) int count --unused
|vp8_loop_filter_simple_vertical_edge_neon| PROC
@ -32,7 +32,6 @@
vld1.s8 {d2[], d3[]}, [r2] ; flimit
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r0], r1
ldr r12, _vlfy_coeff_
vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r1
vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r0], r1
vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r1
@ -41,11 +40,11 @@
vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r0], r1
vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
vld1.u8 {q0}, [r12]! ; 0x80
vmov.u8 q0, #0x80 ; 0x80
vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
vld1.u8 {q11}, [r12]! ; 0x03
vmov.u8 q11, #0x03 ; 0x03
vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
vld1.u8 {q12}, [r12]! ; 0x04
vmov.u8 q12, #0x04 ; 0x04
vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
@ -146,11 +145,4 @@
;-----------------
_vlfy_coeff_
DCD vlfy_coeff
vlfy_coeff
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
END

View File

@ -372,7 +372,6 @@
; q10 q3
|vp8_mbloop_filter_neon| PROC
ldr r12, _mblf_coeff_
; vp8_filter_mask
vabd.u8 q11, q3, q4 ; abs(p3 - p2)
@ -396,7 +395,7 @@
vld1.s8 {d4[], d5[]}, [r2] ; flimit
vld1.u8 {q0}, [r12]!
vmov.u8 q0, #0x80 ; 0x80
vadd.u8 q2, q2, q2 ; flimit * 2
vadd.u8 q2, q2, q1 ; flimit * 2 + limit
@ -431,12 +430,12 @@
vadd.s16 q2, q2, q10
vadd.s16 q13, q13, q11
vld1.u8 {q12}, [r12]! ; #3
vmov.u8 q12, #3 ; #3
vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
vaddw.s8 q13, q13, d3
vld1.u8 {q11}, [r12]! ; #4
vmov.u8 q11, #4 ; #4
; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
vqmovn.s16 d2, q2
@ -444,16 +443,16 @@
vand q1, q1, q15 ; vp8_filter &= mask
vld1.u8 {q15}, [r12]! ; #63
;
vmov.u16 q15, #63 ; #63
vand q13, q1, q14 ; Filter2 &= hev
vld1.u8 {d7}, [r12]! ; #9
vmov.u8 d7, #9 ; #9
vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4)
vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3)
vld1.u8 {d6}, [r12]! ; #18
vmov.u8 d6, #18 ; #18
vshr.s8 q2, q2, #3 ; Filter1 >>= 3
vshr.s8 q13, q13, #3 ; Filter2 >>= 3
@ -463,7 +462,7 @@
vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1)
vld1.u8 {d5}, [r12]! ; #27
vmov.u8 d5, #27 ; #27
vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2)
@ -507,14 +506,4 @@
;-----------------
_mblf_coeff_
DCD mblf_coeff
mblf_coeff
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212
DCD 0x1b1b1b1b, 0x1b1b1b1b
END

View File

@ -17,9 +17,54 @@
#include "vp8/common/idct.h"
#include "vp8/common/onyxc_int.h"
#if CONFIG_MULTITHREAD
#if HAVE_UNISTD_H
#include <unistd.h>
#elif defined(_WIN32)
#include <windows.h>
typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
#endif
#endif
extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
#if CONFIG_MULTITHREAD
static int get_cpu_count()
{
int core_count = 16;
#if HAVE_UNISTD_H
#if defined(_SC_NPROCESSORS_ONLN)
core_count = sysconf(_SC_NPROCESSORS_ONLN);
#elif defined(_SC_NPROC_ONLN)
core_count = sysconf(_SC_NPROC_ONLN);
#endif
#elif defined(_WIN32)
{
PGNSI pGNSI;
SYSTEM_INFO sysinfo;
/* Call GetNativeSystemInfo if supported or
* GetSystemInfo otherwise. */
pGNSI = (PGNSI) GetProcAddress(
GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo");
if (pGNSI != NULL)
pGNSI(&sysinfo);
else
GetSystemInfo(&sysinfo);
core_count = sysinfo.dwNumberOfProcessors;
}
#else
/* other platforms */
#endif
return core_count > 0 ? core_count : 1;
}
#endif
void vp8_machine_specific_config(VP8_COMMON *ctx)
{
#if CONFIG_RUNTIME_CPU_DETECT
@ -98,4 +143,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
#endif
#if CONFIG_MULTITHREAD
ctx->processor_core_count = get_cpu_count();
#endif /* CONFIG_MULTITHREAD */
}

View File

@ -195,6 +195,9 @@ typedef struct VP8Common
#if CONFIG_RUNTIME_CPU_DETECT
VP8_COMMON_RTCD rtcd;
#endif
#if CONFIG_MULTITHREAD
int processor_core_count;
#endif
struct postproc_state postproc_state;
} VP8_COMMON;

View File

@ -439,12 +439,18 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
pbi->b_multithreaded_rd = 0;
pbi->allocated_decoding_thread_count = 0;
core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads;
/* limit decoding threads to the max number of token partitions */
core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
/* limit decoding threads to the available cores */
if (core_count > pbi->common.processor_core_count)
core_count = pbi->common.processor_core_count;
if (core_count > 1)
{
pbi->b_multithreaded_rd = 1;
pbi->decoding_thread_count = core_count -1;
pbi->decoding_thread_count = core_count - 1;
CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));

View File

@ -459,15 +459,15 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
cpi->b_multi_threaded = 0;
cpi->encoding_thread_count = 0;
cpi->processor_core_count = 32; //vp8_get_proc_core_count();
if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
{
int ithread;
int th_count = cpi->oxcf.multi_threaded - 1;
if (cpi->oxcf.multi_threaded > cpi->processor_core_count)
th_count = cpi->processor_core_count - 1;
/* don't allocate more threads than cores available */
if (cpi->oxcf.multi_threaded > cm->processor_core_count)
th_count = cm->processor_core_count - 1;
/* we have th_count + 1 (main) threads processing one row each */
/* no point to have more threads than the sync range allows */
@ -514,6 +514,7 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data;
sem_init(&cpi->h_event_start_lpf, 0, 0);
sem_init(&cpi->h_event_end_picklpf, 0, 0);
sem_init(&cpi->h_event_end_lpf, 0, 0);
lpfthd->ptr1 = (void *)cpi;
@ -547,6 +548,7 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
sem_destroy(&cpi->h_event_end_encoding);
sem_destroy(&cpi->h_event_end_lpf);
sem_destroy(&cpi->h_event_end_picklpf);
sem_destroy(&cpi->h_event_start_lpf);
//free thread related resources

View File

@ -3211,7 +3211,7 @@ void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
#if CONFIG_MULTITHREAD
if (cpi->b_multi_threaded)
sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */
sem_post(&cpi->h_event_end_picklpf); /* signal that we have set filter_level */
#endif
if (cm->filter_level > 0)
@ -4221,7 +4221,7 @@ static void encode_frame_to_data_rate
#if CONFIG_MULTITHREAD
/* wait that filter_level is picked so that we can continue with stream packing */
if (cpi->b_multi_threaded)
sem_wait(&cpi->h_event_end_lpf);
sem_wait(&cpi->h_event_end_picklpf);
#endif
// build the bitstream

View File

@ -580,7 +580,6 @@ typedef struct
// multithread data
int * mt_current_mb_col;
int mt_sync_range;
int processor_core_count;
int b_multi_threaded;
int encoding_thread_count;
@ -595,6 +594,7 @@ typedef struct
sem_t *h_event_start_encoding;
sem_t h_event_end_encoding;
sem_t h_event_start_lpf;
sem_t h_event_end_picklpf;
sem_t h_event_end_lpf;
#endif