up to 6% faster encoding with clang compiler

mostly by revamping the main loop of GetResidualCost() and avoiding some branches

Change-Id: Ib05763e18a6bf46c82dc3d5d1d8eb65e99474207
This commit is contained in:
Pascal Massimino 2013-09-05 10:36:39 -07:00
parent 93402f02db
commit 42542be855

View File

@ -292,31 +292,20 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
if (res->last < 0) {
return VP8BitCost(0, p0);
}
cost = 0;
while (n < res->last) {
int v = res->coeffs[n];
cost = VP8BitCost(1, p0);
for (; n < res->last; ++n) {
const int v = abs(res->coeffs[n]);
const int b = VP8EncBands[n + 1];
++n;
if (v == 0) {
// short-case for VP8LevelCost(t, 0) (note: VP8LevelFixedCosts[0] == 0):
cost += t[0];
t = res->cost[b][0];
continue;
}
v = abs(v);
cost += VP8BitCost(1, p0);
const int ctx = (v >= 2) ? 2 : v;
cost += VP8LevelCost(t, v);
{
const int ctx = (v == 1) ? 1 : 2;
p0 = res->prob[b][ctx][0];
t = res->cost[b][ctx];
}
t = res->cost[b][ctx];
// the masking trick is faster than "if (v) cost += ..." with clang
cost += (v ? ~0U : 0) & VP8BitCost(1, res->prob[b][ctx][0]);
}
// Last coefficient is always non-zero
{
const int v = abs(res->coeffs[n]);
assert(v != 0);
cost += VP8BitCost(1, p0);
cost += VP8LevelCost(t, v);
if (n < 15) {
const int b = VP8EncBands[n + 1];