intrapred/d135: flatten border results before storing
the results along the top and left border are then stored with a moving window into the vector. ~40-67% faster on ARM, ~40-77+% on x86 depending on the block size. Change-Id: Iab369aa2946a3ae4eb7290d512868fe5db92dbc8
This commit is contained in:
@@ -158,20 +158,29 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
|||||||
|
|
||||||
static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||||
const uint8_t *above, const uint8_t *left) {
|
const uint8_t *above, const uint8_t *left) {
|
||||||
int r, c;
|
int i;
|
||||||
dst[0] = AVG3(left[0], above[-1], above[0]);
|
#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7
|
||||||
for (c = 1; c < bs; c++)
|
// silence a spurious -Warray-bounds warning, possibly related to:
|
||||||
dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
|
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273
|
||||||
|
uint8_t border[69];
|
||||||
|
#else
|
||||||
|
uint8_t border[32 + 32 - 1]; // outer border from bottom-left to top-right
|
||||||
|
#endif
|
||||||
|
|
||||||
dst[stride] = AVG3(above[-1], left[0], left[1]);
|
// dst(bs, bs - 2)[0], i.e., border starting at bottom-left
|
||||||
for (r = 2; r < bs; ++r)
|
for (i = 0; i < bs - 2; ++i) {
|
||||||
dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
|
border[i] = AVG3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
|
||||||
|
}
|
||||||
|
border[bs - 2] = AVG3(above[-1], left[0], left[1]);
|
||||||
|
border[bs - 1] = AVG3(left[0], above[-1], above[0]);
|
||||||
|
border[bs - 0] = AVG3(above[-1], above[0], above[1]);
|
||||||
|
// dst[0][2, size), i.e., remaining top border ascending
|
||||||
|
for (i = 0; i < bs - 2; ++i) {
|
||||||
|
border[bs + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]);
|
||||||
|
}
|
||||||
|
|
||||||
dst += stride;
|
for (i = 0; i < bs; ++i) {
|
||||||
for (r = 1; r < bs; ++r) {
|
memcpy(dst + i * stride, border + bs - 1 - i, bs);
|
||||||
for (c = 1; c < bs; c++)
|
|
||||||
dst[c] = dst[-stride + c - 1];
|
|
||||||
dst += stride;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user