From 7481edb33f5ea82e29f12e0e872d04ea1a5373cb Mon Sep 17 00:00:00 2001 From: James Zern Date: Tue, 27 Sep 2016 19:43:03 -0700 Subject: [PATCH] vpx_dsp/get_prob: make clip_prob branchless + inline the function directly as there was only one consumer (get_prob()) this is an attempt to reduce the amount of branches to workaround an amd bug. this change is mildly faster or neutral across x86-64, arm. http://support.amd.com/TechDocs/44739_12h_Rev_Gd.pdf 665 Integer Divide Instruction May Cause Unpredictable Behavior BUG=chromium:639712 Suggested-by: Pascal Massimino Change-Id: Ia91823aded79aab469dd68095d44300e8df04ed2 --- vpx_dsp/prob.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/vpx_dsp/prob.h b/vpx_dsp/prob.h index 3127a00bb..4402cd30a 100644 --- a/vpx_dsp/prob.h +++ b/vpx_dsp/prob.h @@ -43,13 +43,14 @@ typedef int8_t vpx_tree_index; typedef const vpx_tree_index vpx_tree[]; -static INLINE vpx_prob clip_prob(int p) { - return (p > 255) ? 255 : (p < 1) ? 1 : p; -} - static INLINE vpx_prob get_prob(unsigned int num, unsigned int den) { if (den == 0) return 128u; - return clip_prob((int)(((int64_t)num * 256 + (den >> 1)) / den)); + { + const int p = (int)(((int64_t)num * 256 + (den >> 1)) / den); + // (p > 255) ? 255 : (p < 1) ? 1 : p; + const int clipped_prob = p | ((255 - p) >> 23) | (p == 0); + return (vpx_prob)clipped_prob; + } } static INLINE vpx_prob get_binary_prob(unsigned int n0, unsigned int n1) {