Use mov.16b instead of mov.8h

According to the arm architecture reference manual, the mov (vector)
instruction can only use the arrangement specifiers '8b' and '16b'.
The apple tools still accept the '8h' form, but it assembles into the
same as '16b'. (When copying a vector register to another, the element
size in the vectors don't matter.)

This fixes building with gnu binutils.
This commit is contained in:
Martin Storsjö 2014-07-08 09:29:23 +03:00
parent 3a1cc63649
commit 86b3a999d5

View File

@ -73,7 +73,7 @@
cmgt $4.8h, $0.8h, #0 // if true, location of coef == 11111111 cmgt $4.8h, $0.8h, #0 // if true, location of coef == 11111111
bif $3.16b, $1.16b, $4.16b // if (x<0) reserved part; else keep 0 untouched bif $3.16b, $1.16b, $4.16b // if (x<0) reserved part; else keep 0 untouched
shl $3.8h, $3.8h, #1 shl $3.8h, $3.8h, #1
mov.8h $6, $1 mov.16b $6, $1
sub $1.8h, $1.8h, $3.8h // if x > 0, -= 0; else x-= 2x sub $1.8h, $1.8h, $3.8h // if x > 0, -= 0; else x-= 2x
// } // }
.endm .endm
@ -315,7 +315,7 @@ shrn2 \arg1\().8h, \arg5\().4s, #16
cmgt \arg4\().8h, \arg0\().8h, #0 // if true, location of coef == 11111111 cmgt \arg4\().8h, \arg0\().8h, #0 // if true, location of coef == 11111111
bif \arg3\().16b, \arg1\().16b, \arg4\().16b // if (x<0) reserved part; else keep 0 untouched bif \arg3\().16b, \arg1\().16b, \arg4\().16b // if (x<0) reserved part; else keep 0 untouched
shl \arg3\().8h, \arg3\().8h, #1 shl \arg3\().8h, \arg3\().8h, #1
mov \arg6\().8h, \arg1\().8h mov \arg6\().16b, \arg1\().16b
sub \arg1\().8h, \arg1\().8h, \arg3\().8h // if x > 0, -= 0; else x-= 2x sub \arg1\().8h, \arg1\().8h, \arg3\().8h // if x > 0, -= 0; else x-= 2x
// } // }
.endm .endm
@ -533,7 +533,7 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsQuant4x4_AArch64_neon
ld1 {v2.8h}, [x1] ld1 {v2.8h}, [x1]
ld1 {v0.8h, v1.8h}, [x0] ld1 {v0.8h, v1.8h}, [x0]
ld1 {v3.8h}, [x2] ld1 {v3.8h}, [x2]
mov.8h v4, v2 mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS v0, v2, v3, v5, v6, v7 NEWQUANT_COEF_EACH_16BITS v0, v2, v3, v5, v6, v7
st1 {v2.8h}, [x0], #16 st1 {v2.8h}, [x0], #16
NEWQUANT_COEF_EACH_16BITS v1, v4, v3, v5, v6, v7 NEWQUANT_COEF_EACH_16BITS v1, v4, v3, v5, v6, v7
@ -545,7 +545,7 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsQuant4x4Dc_AArch64_neon
ld1 {v0.8h, v1.8h}, [x0] ld1 {v0.8h, v1.8h}, [x0]
dup v2.8h, w1 // even ff range [0, 768] dup v2.8h, w1 // even ff range [0, 768]
dup v3.8h, w2 dup v3.8h, w2
mov.8h v4, v2 mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS v0, v2, v3, v5, v6, v7 NEWQUANT_COEF_EACH_16BITS v0, v2, v3, v5, v6, v7
st1 {v2.8h}, [x0], #16 st1 {v2.8h}, [x0], #16
NEWQUANT_COEF_EACH_16BITS v1, v4, v3, v5, v6, v7 NEWQUANT_COEF_EACH_16BITS v1, v4, v3, v5, v6, v7
@ -559,10 +559,10 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsQuantFour4x4_AArch64_neon
.rept 4 .rept 4
ld1 {v0.8h, v1.8h}, [x0], #32 ld1 {v0.8h, v1.8h}, [x0], #32
mov.8h v4, v2 mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS v0, v4, v3, v5, v6, v7 NEWQUANT_COEF_EACH_16BITS v0, v4, v3, v5, v6, v7
st1 {v4.8h}, [x1], #16 st1 {v4.8h}, [x1], #16
mov.8h v4, v2 mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS v1, v4, v3, v5, v6, v7 NEWQUANT_COEF_EACH_16BITS v1, v4, v3, v5, v6, v7
st1 {v4.8h}, [x1], #16 st1 {v4.8h}, [x1], #16
.endr .endr
@ -575,36 +575,36 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsQuantFour4x4Max_AArch64_neon
mov x1, x0 mov x1, x0
ld1 {v0.8h, v1.8h}, [x0], #32 ld1 {v0.8h, v1.8h}, [x0], #32
mov.8h v4, v2 mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v16 NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v16
st1 {v4.8h}, [x1], #16 st1 {v4.8h}, [x1], #16
mov.8h v4, v2 mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v17 NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v17
st1 {v4.8h}, [x1], #16 // then 1st 16 elem in v16 & v17 st1 {v4.8h}, [x1], #16 // then 1st 16 elem in v16 & v17
ld1 {v0.8h, v1.8h}, [x0], #32 ld1 {v0.8h, v1.8h}, [x0], #32
mov.8h v4, v2 mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v18 NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v18
st1 {v4.8h}, [x1], #16 st1 {v4.8h}, [x1], #16
mov.8h v4, v2 mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v19 NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v19
st1 {v4.8h}, [x1], #16 // then 2st 16 elem in v18 & v19 st1 {v4.8h}, [x1], #16 // then 2st 16 elem in v18 & v19
SELECT_MAX_IN_ABS_COEF v16, v17, v18, v19, h20, h21 SELECT_MAX_IN_ABS_COEF v16, v17, v18, v19, h20, h21
ld1 {v0.8h, v1.8h}, [x0], #32 ld1 {v0.8h, v1.8h}, [x0], #32
mov.8h v4, v2 mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v16 NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v16
st1 {v4.8h}, [x1], #16 st1 {v4.8h}, [x1], #16
mov.8h v4, v2 mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v17 NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v17
st1 {v4.8h}, [x1], #16 // then 1st 16 elem in v16 & v17 st1 {v4.8h}, [x1], #16 // then 1st 16 elem in v16 & v17
ld1 {v0.8h, v1.8h}, [x0], #32 ld1 {v0.8h, v1.8h}, [x0], #32
mov.8h v4, v2 mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v18 NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v18
st1 {v4.8h}, [x1], #16 st1 {v4.8h}, [x1], #16
mov.8h v4, v2 mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v19 NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v19
st1 {v4.8h}, [x1], #16 // then 2st 16 elem in v18 & v19 st1 {v4.8h}, [x1], #16 // then 2st 16 elem in v18 & v19
@ -944,4 +944,4 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsIDctRecI16x16Dc_AArch64_neon
st1 {v3.16b}, [x0], x1 st1 {v3.16b}, [x0], x1
.endr .endr
WELS_ASM_AARCH64_FUNC_END WELS_ASM_AARCH64_FUNC_END
#endif #endif