Prefer modes in the same order as the reference, in the aarch64 combined intra satd/sad

This fixes encoding tests on aarch64.
This commit is contained in:
Martin Storsjö 2014-06-27 23:43:08 +03:00
parent 894f073e4b
commit cf6ae23413

View File

@ -97,12 +97,20 @@
#ifdef __APPLE__ #ifdef __APPLE__
.macro SELECT_BEST_COST .macro SELECT_BEST_COST
cmp w1, $0 cmp w1, $0
csel $0, $0, w1, hs csel $0, $0, w1, $2
cset w7, lo cset w7, $1
cmp w2, $0 cmp w2, $0
mov w6, #2 mov w6, #2
csel $0, $0, w2, hs csel $0, $0, w2, $2
csel w7, w7, w6, hs csel w7, w7, w6, $2
.endm
.macro SELECT_BEST_COST_PREFER_HIGHER arg0
SELECT_BEST_COST \arg0, ls, hi
.endm
.macro SELECT_BEST_COST_PREFER_LOWER arg0
SELECT_BEST_COST \arg0, lo, hs
.endm .endm
.macro LOAD_CHROMA_DATA .macro LOAD_CHROMA_DATA
@ -173,14 +181,22 @@
add $7, $7, v4.4s add $7, $7, v4.4s
.endm .endm
#else #else
.macro SELECT_BEST_COST arg0 .macro SELECT_BEST_COST arg0, arg1, arg2
cmp w1, \arg0 cmp w1, \arg0
csel \arg0, \arg0, w1, hs csel \arg0, \arg0, w1, \arg2
cset w7, lo cset w7, \arg1
cmp w2, \arg0 cmp w2, \arg0
mov w6, #2 mov w6, #2
csel \arg0, \arg0, w2, hs csel \arg0, \arg0, w2, \arg2
csel w7, w7, w6, hs csel w7, w7, w6, \arg2
.endm
.macro SELECT_BEST_COST_PREFER_HIGHER arg0
SELECT_BEST_COST \arg0, ls, hi
.endm
.macro SELECT_BEST_COST_PREFER_LOWER arg0
SELECT_BEST_COST \arg0, lo, hs
.endm .endm
.macro LOAD_CHROMA_DATA arg0, arg1, arg2 .macro LOAD_CHROMA_DATA arg0, arg1, arg2
@ -347,7 +363,7 @@ WELS_ASM_ARCH64_FUNC_BEGIN WelsIntra8x8Combined3Sad_AArch64_neon
saddlv s31, v31.8h saddlv s31, v31.8h
fmov w0, s31 fmov w0, s31
SELECT_BEST_COST w0 SELECT_BEST_COST_PREFER_HIGHER w0
str w7, [x4] str w7, [x4]
WELS_ASM_ARCH64_FUNC_END WELS_ASM_ARCH64_FUNC_END
@ -399,7 +415,7 @@ sad_intra_16x16_x3_opt_loop0:
fmov w2, s31 fmov w2, s31
add w2, w2, w5, lsl #1 add w2, w2, w5, lsl #1
SELECT_BEST_COST w0 SELECT_BEST_COST_PREFER_LOWER w0
str w7, [x4] str w7, [x4]
WELS_ASM_ARCH64_FUNC_END WELS_ASM_ARCH64_FUNC_END
@ -464,7 +480,7 @@ WELS_ASM_ARCH64_FUNC_BEGIN WelsIntra4x4Combined3Satd_AArch64_neon
add w2, w2, w6 add w2, w2, w6
mov w10, w0 mov w10, w0
SELECT_BEST_COST w10 SELECT_BEST_COST_PREFER_HIGHER w10
str w7, [x5] str w7, [x5]
@ -579,7 +595,7 @@ WELS_ASM_ARCH64_FUNC_BEGIN WelsIntra8x8Combined3Satd_AArch64_neon
addv s31, v31.4s addv s31, v31.4s
fmov w0, s31 fmov w0, s31
SELECT_BEST_COST w0 SELECT_BEST_COST_PREFER_HIGHER w0
str w7, [x4] str w7, [x4]
WELS_ASM_ARCH64_FUNC_END WELS_ASM_ARCH64_FUNC_END
@ -656,7 +672,7 @@ WELS_ASM_ARCH64_FUNC_BEGIN WelsIntra16x16Combined3Satd_AArch64_neon
fmov w2, s31 fmov w2, s31
add w2, w2, w5, lsl #1 add w2, w2, w5, lsl #1
SELECT_BEST_COST w0 SELECT_BEST_COST_PREFER_LOWER w0
str w7, [x4] str w7, [x4]