intrapred: specialize highbd 4x4 predictors
d207/d63/d45/d117/d135/d153 ~9-45% better depending on the predictor on 32-bit ARM, similar range on x86-64 this matches the non-highbitdepth implementation BUG=webm:1316 Change-Id: Iddebdf7c58c6f31c47cae04da95c6e5318200e4c
This commit is contained in:
parent
e05f4cf8f4
commit
67cde46dd7
@ -690,6 +690,144 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vpx_highbd_d207_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
|
||||
const uint16_t *above,
|
||||
const uint16_t *left, int bd) {
|
||||
const int I = left[0];
|
||||
const int J = left[1];
|
||||
const int K = left[2];
|
||||
const int L = left[3];
|
||||
(void)above;
|
||||
(void)bd;
|
||||
DST(0, 0) = AVG2(I, J);
|
||||
DST(2, 0) = DST(0, 1) = AVG2(J, K);
|
||||
DST(2, 1) = DST(0, 2) = AVG2(K, L);
|
||||
DST(1, 0) = AVG3(I, J, K);
|
||||
DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
|
||||
DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
|
||||
DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
void vpx_highbd_d63_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
|
||||
const uint16_t *above, const uint16_t *left,
|
||||
int bd) {
|
||||
const int A = above[0];
|
||||
const int B = above[1];
|
||||
const int C = above[2];
|
||||
const int D = above[3];
|
||||
const int E = above[4];
|
||||
const int F = above[5];
|
||||
const int G = above[6];
|
||||
(void)left;
|
||||
(void)bd;
|
||||
DST(0, 0) = AVG2(A, B);
|
||||
DST(1, 0) = DST(0, 2) = AVG2(B, C);
|
||||
DST(2, 0) = DST(1, 2) = AVG2(C, D);
|
||||
DST(3, 0) = DST(2, 2) = AVG2(D, E);
|
||||
DST(3, 2) = AVG2(E, F); // differs from vp8
|
||||
|
||||
DST(0, 1) = AVG3(A, B, C);
|
||||
DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
|
||||
DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
|
||||
DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
|
||||
DST(3, 3) = AVG3(E, F, G); // differs from vp8
|
||||
}
|
||||
|
||||
void vpx_highbd_d45_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
|
||||
const uint16_t *above, const uint16_t *left,
|
||||
int bd) {
|
||||
const int A = above[0];
|
||||
const int B = above[1];
|
||||
const int C = above[2];
|
||||
const int D = above[3];
|
||||
const int E = above[4];
|
||||
const int F = above[5];
|
||||
const int G = above[6];
|
||||
const int H = above[7];
|
||||
(void)left;
|
||||
(void)bd;
|
||||
DST(0, 0) = AVG3(A, B, C);
|
||||
DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
|
||||
DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
|
||||
DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
|
||||
DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
|
||||
DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
|
||||
DST(3, 3) = H; // differs from vp8
|
||||
}
|
||||
|
||||
void vpx_highbd_d117_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
|
||||
const uint16_t *above,
|
||||
const uint16_t *left, int bd) {
|
||||
const int I = left[0];
|
||||
const int J = left[1];
|
||||
const int K = left[2];
|
||||
const int X = above[-1];
|
||||
const int A = above[0];
|
||||
const int B = above[1];
|
||||
const int C = above[2];
|
||||
const int D = above[3];
|
||||
(void)bd;
|
||||
DST(0, 0) = DST(1, 2) = AVG2(X, A);
|
||||
DST(1, 0) = DST(2, 2) = AVG2(A, B);
|
||||
DST(2, 0) = DST(3, 2) = AVG2(B, C);
|
||||
DST(3, 0) = AVG2(C, D);
|
||||
|
||||
DST(0, 3) = AVG3(K, J, I);
|
||||
DST(0, 2) = AVG3(J, I, X);
|
||||
DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
|
||||
DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
|
||||
DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
|
||||
DST(3, 1) = AVG3(B, C, D);
|
||||
}
|
||||
|
||||
void vpx_highbd_d135_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
|
||||
const uint16_t *above,
|
||||
const uint16_t *left, int bd) {
|
||||
const int I = left[0];
|
||||
const int J = left[1];
|
||||
const int K = left[2];
|
||||
const int L = left[3];
|
||||
const int X = above[-1];
|
||||
const int A = above[0];
|
||||
const int B = above[1];
|
||||
const int C = above[2];
|
||||
const int D = above[3];
|
||||
(void)bd;
|
||||
DST(0, 3) = AVG3(J, K, L);
|
||||
DST(1, 3) = DST(0, 2) = AVG3(I, J, K);
|
||||
DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J);
|
||||
DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
|
||||
DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
|
||||
DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
|
||||
DST(3, 0) = AVG3(D, C, B);
|
||||
}
|
||||
|
||||
void vpx_highbd_d153_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
|
||||
const uint16_t *above,
|
||||
const uint16_t *left, int bd) {
|
||||
const int I = left[0];
|
||||
const int J = left[1];
|
||||
const int K = left[2];
|
||||
const int L = left[3];
|
||||
const int X = above[-1];
|
||||
const int A = above[0];
|
||||
const int B = above[1];
|
||||
const int C = above[2];
|
||||
(void)bd;
|
||||
|
||||
DST(0, 0) = DST(2, 1) = AVG2(I, X);
|
||||
DST(0, 1) = DST(2, 2) = AVG2(J, I);
|
||||
DST(0, 2) = DST(2, 3) = AVG2(K, J);
|
||||
DST(0, 3) = AVG2(L, K);
|
||||
|
||||
DST(3, 0) = AVG3(A, B, C);
|
||||
DST(2, 0) = AVG3(X, A, B);
|
||||
DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
|
||||
DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
|
||||
DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
// This serves as a wrapper function, so that all the prediction functions
|
||||
@ -725,7 +863,6 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
|
||||
intra_pred_sized(type, 8) \
|
||||
intra_pred_sized(type, 16) \
|
||||
intra_pred_sized(type, 32) \
|
||||
intra_pred_highbd_sized(type, 4) \
|
||||
intra_pred_highbd_sized(type, 8) \
|
||||
intra_pred_highbd_sized(type, 16) \
|
||||
intra_pred_highbd_sized(type, 32)
|
||||
|
Loading…
x
Reference in New Issue
Block a user