intrapred: specialize highbd 4x4 predictors

d207/d63/d45/d117/d135/d153

~9-45% better depending on the predictor on 32-bit ARM, similar range on
x86-64

this matches the non-highbitdepth implementation

BUG=webm:1316

Change-Id: Iddebdf7c58c6f31c47cae04da95c6e5318200e4c
This commit is contained in:
James Zern 2017-03-24 19:24:39 -07:00
parent e05f4cf8f4
commit 67cde46dd7

View File

@ -690,6 +690,144 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
dst += stride;
}
}
void vpx_highbd_d207_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
const int I = left[0];
const int J = left[1];
const int K = left[2];
const int L = left[3];
(void)above;
(void)bd;
DST(0, 0) = AVG2(I, J);
DST(2, 0) = DST(0, 1) = AVG2(J, K);
DST(2, 1) = DST(0, 2) = AVG2(K, L);
DST(1, 0) = AVG3(I, J, K);
DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
}
void vpx_highbd_d63_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
const int A = above[0];
const int B = above[1];
const int C = above[2];
const int D = above[3];
const int E = above[4];
const int F = above[5];
const int G = above[6];
(void)left;
(void)bd;
DST(0, 0) = AVG2(A, B);
DST(1, 0) = DST(0, 2) = AVG2(B, C);
DST(2, 0) = DST(1, 2) = AVG2(C, D);
DST(3, 0) = DST(2, 2) = AVG2(D, E);
DST(3, 2) = AVG2(E, F); // differs from vp8
DST(0, 1) = AVG3(A, B, C);
DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
DST(3, 3) = AVG3(E, F, G); // differs from vp8
}
void vpx_highbd_d45_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd) {
const int A = above[0];
const int B = above[1];
const int C = above[2];
const int D = above[3];
const int E = above[4];
const int F = above[5];
const int G = above[6];
const int H = above[7];
(void)left;
(void)bd;
DST(0, 0) = AVG3(A, B, C);
DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
DST(3, 3) = H; // differs from vp8
}
void vpx_highbd_d117_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
const int I = left[0];
const int J = left[1];
const int K = left[2];
const int X = above[-1];
const int A = above[0];
const int B = above[1];
const int C = above[2];
const int D = above[3];
(void)bd;
DST(0, 0) = DST(1, 2) = AVG2(X, A);
DST(1, 0) = DST(2, 2) = AVG2(A, B);
DST(2, 0) = DST(3, 2) = AVG2(B, C);
DST(3, 0) = AVG2(C, D);
DST(0, 3) = AVG3(K, J, I);
DST(0, 2) = AVG3(J, I, X);
DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
DST(3, 1) = AVG3(B, C, D);
}
void vpx_highbd_d135_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
const int I = left[0];
const int J = left[1];
const int K = left[2];
const int L = left[3];
const int X = above[-1];
const int A = above[0];
const int B = above[1];
const int C = above[2];
const int D = above[3];
(void)bd;
DST(0, 3) = AVG3(J, K, L);
DST(1, 3) = DST(0, 2) = AVG3(I, J, K);
DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J);
DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
DST(3, 0) = AVG3(D, C, B);
}
void vpx_highbd_d153_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above,
const uint16_t *left, int bd) {
const int I = left[0];
const int J = left[1];
const int K = left[2];
const int L = left[3];
const int X = above[-1];
const int A = above[0];
const int B = above[1];
const int C = above[2];
(void)bd;
DST(0, 0) = DST(2, 1) = AVG2(I, X);
DST(0, 1) = DST(2, 2) = AVG2(J, I);
DST(0, 2) = DST(2, 3) = AVG2(K, J);
DST(0, 3) = AVG2(L, K);
DST(3, 0) = AVG3(A, B, C);
DST(2, 0) = AVG3(X, A, B);
DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
DST(1, 3) = AVG3(L, K, J);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
// This serves as a wrapper function, so that all the prediction functions
@ -725,7 +863,6 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
intra_pred_sized(type, 8) \
intra_pred_sized(type, 16) \
intra_pred_sized(type, 32) \
intra_pred_highbd_sized(type, 4) \
intra_pred_highbd_sized(type, 8) \
intra_pred_highbd_sized(type, 16) \
intra_pred_highbd_sized(type, 32)