ARM: use standard syntax for all LDRD/STRD instructions
The standard syntax requires two destination registers for LDRD/STRD instructions. Some versions of the GNU assembler allow using only one with the second implicit, others are more strict. Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
parent
b6a3849adb
commit
998170913c
@ -24,7 +24,7 @@
|
|||||||
.macro h264_chroma_mc8 type, codec=h264
|
.macro h264_chroma_mc8 type, codec=h264
|
||||||
function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
|
function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
|
||||||
push {r4-r7, lr}
|
push {r4-r7, lr}
|
||||||
ldrd r4, [sp, #20]
|
ldrd r4, r5, [sp, #20]
|
||||||
.ifc \type,avg
|
.ifc \type,avg
|
||||||
mov lr, r0
|
mov lr, r0
|
||||||
.endif
|
.endif
|
||||||
@ -182,7 +182,7 @@ endfunc
|
|||||||
.macro h264_chroma_mc4 type, codec=h264
|
.macro h264_chroma_mc4 type, codec=h264
|
||||||
function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
|
function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
|
||||||
push {r4-r7, lr}
|
push {r4-r7, lr}
|
||||||
ldrd r4, [sp, #20]
|
ldrd r4, r5, [sp, #20]
|
||||||
.ifc \type,avg
|
.ifc \type,avg
|
||||||
mov lr, r0
|
mov lr, r0
|
||||||
.endif
|
.endif
|
||||||
|
@ -886,7 +886,7 @@ T mov sp, r0
|
|||||||
mov r12, #8
|
mov r12, #8
|
||||||
vpush {d8-d15}
|
vpush {d8-d15}
|
||||||
bl put_h264_qpel8_h_lowpass_neon
|
bl put_h264_qpel8_h_lowpass_neon
|
||||||
ldrd r0, [r11], #8
|
ldrd r0, r1, [r11], #8
|
||||||
mov r3, r2
|
mov r3, r2
|
||||||
add r12, sp, #64
|
add r12, sp, #64
|
||||||
sub r1, r1, r2, lsl #1
|
sub r1, r1, r2, lsl #1
|
||||||
@ -913,7 +913,7 @@ T mov sp, r0
|
|||||||
vpush {d8-d15}
|
vpush {d8-d15}
|
||||||
bl put_h264_qpel8_h_lowpass_neon
|
bl put_h264_qpel8_h_lowpass_neon
|
||||||
mov r4, r0
|
mov r4, r0
|
||||||
ldrd r0, [r11], #8
|
ldrd r0, r1, [r11], #8
|
||||||
sub r1, r1, r2, lsl #1
|
sub r1, r1, r2, lsl #1
|
||||||
sub r1, r1, #2
|
sub r1, r1, #2
|
||||||
mov r3, r2
|
mov r3, r2
|
||||||
@ -958,7 +958,7 @@ T mov sp, r0
|
|||||||
vpush {d8-d15}
|
vpush {d8-d15}
|
||||||
bl put_h264_qpel8_v_lowpass_neon
|
bl put_h264_qpel8_v_lowpass_neon
|
||||||
mov r4, r0
|
mov r4, r0
|
||||||
ldrd r0, [r11], #8
|
ldrd r0, r1, [r11], #8
|
||||||
sub r1, r1, r3, lsl #1
|
sub r1, r1, r3, lsl #1
|
||||||
sub r1, r1, #2
|
sub r1, r1, #2
|
||||||
sub r2, r4, #64
|
sub r2, r4, #64
|
||||||
@ -1071,7 +1071,7 @@ T mov sp, r0
|
|||||||
mov r3, #16
|
mov r3, #16
|
||||||
vpush {d8-d15}
|
vpush {d8-d15}
|
||||||
bl put_h264_qpel16_h_lowpass_neon
|
bl put_h264_qpel16_h_lowpass_neon
|
||||||
ldrd r0, [r11], #8
|
ldrd r0, r1, [r11], #8
|
||||||
mov r3, r2
|
mov r3, r2
|
||||||
add r12, sp, #64
|
add r12, sp, #64
|
||||||
sub r1, r1, r2, lsl #1
|
sub r1, r1, r2, lsl #1
|
||||||
@ -1096,7 +1096,7 @@ T mov sp, r0
|
|||||||
vpush {d8-d15}
|
vpush {d8-d15}
|
||||||
bl put_h264_qpel16_h_lowpass_neon_packed
|
bl put_h264_qpel16_h_lowpass_neon_packed
|
||||||
mov r4, r0
|
mov r4, r0
|
||||||
ldrd r0, [r11], #8
|
ldrd r0, r1, [r11], #8
|
||||||
sub r1, r1, r2, lsl #1
|
sub r1, r1, r2, lsl #1
|
||||||
sub r1, r1, #2
|
sub r1, r1, #2
|
||||||
mov r3, r2
|
mov r3, r2
|
||||||
@ -1139,7 +1139,7 @@ T mov sp, r0
|
|||||||
vpush {d8-d15}
|
vpush {d8-d15}
|
||||||
bl put_h264_qpel16_v_lowpass_neon_packed
|
bl put_h264_qpel16_v_lowpass_neon_packed
|
||||||
mov r4, r0
|
mov r4, r0
|
||||||
ldrd r0, [r11], #8
|
ldrd r0, r1, [r11], #8
|
||||||
sub r1, r1, r3, lsl #1
|
sub r1, r1, r3, lsl #1
|
||||||
sub r1, r1, #2
|
sub r1, r1, #2
|
||||||
mov r2, r3
|
mov r2, r3
|
||||||
|
@ -61,9 +61,9 @@ function ff_dct_unquantize_h263_armv5te, export=1
|
|||||||
mov ip, #0
|
mov ip, #0
|
||||||
subs r3, r3, #2
|
subs r3, r3, #2
|
||||||
ble 2f
|
ble 2f
|
||||||
ldrd r4, [r0, #0]
|
ldrd r4, r5, [r0, #0]
|
||||||
1:
|
1:
|
||||||
ldrd r6, [r0, #8]
|
ldrd r6, r7, [r0, #8]
|
||||||
|
|
||||||
dequant_t r9, r4, r1, r2, r9
|
dequant_t r9, r4, r1, r2, r9
|
||||||
dequant_t lr, r5, r1, r2, lr
|
dequant_t lr, r5, r1, r2, lr
|
||||||
@ -87,7 +87,7 @@ function ff_dct_unquantize_h263_armv5te, export=1
|
|||||||
|
|
||||||
subs r3, r3, #8
|
subs r3, r3, #8
|
||||||
it gt
|
it gt
|
||||||
ldrdgt r4, [r0, #0] /* load data early to avoid load/use pipeline stall */
|
ldrdgt r4, r5, [r0, #0] /* load data early to avoid load/use pipeline stall */
|
||||||
bgt 1b
|
bgt 1b
|
||||||
|
|
||||||
adds r3, r3, #2
|
adds r3, r3, #2
|
||||||
|
@ -46,8 +46,8 @@ w57: .long W57
|
|||||||
function idct_row_armv5te
|
function idct_row_armv5te
|
||||||
str lr, [sp, #-4]!
|
str lr, [sp, #-4]!
|
||||||
|
|
||||||
ldrd v1, [a1, #8]
|
ldrd v1, v2, [a1, #8]
|
||||||
ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */
|
ldrd a3, a4, [a1] /* a3 = row[1:0], a4 = row[3:2] */
|
||||||
orrs v1, v1, v2
|
orrs v1, v1, v2
|
||||||
itt eq
|
itt eq
|
||||||
cmpeq v1, a4
|
cmpeq v1, a4
|
||||||
@ -78,7 +78,7 @@ function idct_row_armv5te
|
|||||||
smultt fp, lr, a3
|
smultt fp, lr, a3
|
||||||
sub v7, v7, a2
|
sub v7, v7, a2
|
||||||
smulbt a2, lr, a4
|
smulbt a2, lr, a4
|
||||||
ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */
|
ldrd a3, a4, [a1, #8] /* a3=row[5:4] a4=row[7:6] */
|
||||||
sub fp, fp, a2
|
sub fp, fp, a2
|
||||||
|
|
||||||
orrs a2, a3, a4
|
orrs a2, a3, a4
|
||||||
@ -121,7 +121,7 @@ function idct_row_armv5te
|
|||||||
add a2, v4, fp
|
add a2, v4, fp
|
||||||
mov a2, a2, lsr #11
|
mov a2, a2, lsr #11
|
||||||
add a4, a4, a2, lsl #16
|
add a4, a4, a2, lsl #16
|
||||||
strd a3, [a1]
|
strd a3, a4, [a1]
|
||||||
|
|
||||||
sub a2, v4, fp
|
sub a2, v4, fp
|
||||||
mov a3, a2, lsr #11
|
mov a3, a2, lsr #11
|
||||||
@ -135,7 +135,7 @@ function idct_row_armv5te
|
|||||||
sub a2, v1, v5
|
sub a2, v1, v5
|
||||||
mov a2, a2, lsr #11
|
mov a2, a2, lsr #11
|
||||||
add a4, a4, a2, lsl #16
|
add a4, a4, a2, lsl #16
|
||||||
strd a3, [a1, #8]
|
strd a3, a4, [a1, #8]
|
||||||
|
|
||||||
ldr pc, [sp], #4
|
ldr pc, [sp], #4
|
||||||
|
|
||||||
@ -144,8 +144,8 @@ row_dc_only:
|
|||||||
bic a3, a3, #0xe000
|
bic a3, a3, #0xe000
|
||||||
mov a3, a3, lsl #3
|
mov a3, a3, lsl #3
|
||||||
mov a4, a3
|
mov a4, a3
|
||||||
strd a3, [a1]
|
strd a3, a4, [a1]
|
||||||
strd a3, [a1, #8]
|
strd a3, a4, [a1, #8]
|
||||||
|
|
||||||
ldr pc, [sp], #4
|
ldr pc, [sp], #4
|
||||||
endfunc
|
endfunc
|
||||||
|
@ -159,8 +159,8 @@ function idct_col4_neon
|
|||||||
vmull.s16 q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/
|
vmull.s16 q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/
|
||||||
vld1.64 {d8}, [r2,:64], ip /* d5 = col[3] */
|
vld1.64 {d8}, [r2,:64], ip /* d5 = col[3] */
|
||||||
|
|
||||||
ldrd r4, [r2]
|
ldrd r4, r5, [r2]
|
||||||
ldrd r6, [r2, #16]
|
ldrd r6, r7, [r2, #16]
|
||||||
orrs r4, r4, r5
|
orrs r4, r4, r5
|
||||||
|
|
||||||
idct_col4_top
|
idct_col4_top
|
||||||
@ -176,7 +176,7 @@ function idct_col4_neon
|
|||||||
vadd.i32 q14, q14, q7
|
vadd.i32 q14, q14, q7
|
||||||
|
|
||||||
1: orrs r6, r6, r7
|
1: orrs r6, r6, r7
|
||||||
ldrd r4, [r2, #16]
|
ldrd r4, r5, [r2, #16]
|
||||||
it eq
|
it eq
|
||||||
addeq r2, r2, #16
|
addeq r2, r2, #16
|
||||||
beq 2f
|
beq 2f
|
||||||
@ -188,7 +188,7 @@ function idct_col4_neon
|
|||||||
vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
|
vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
|
||||||
|
|
||||||
2: orrs r4, r4, r5
|
2: orrs r4, r4, r5
|
||||||
ldrd r4, [r2, #16]
|
ldrd r4, r5, [r2, #16]
|
||||||
it eq
|
it eq
|
||||||
addeq r2, r2, #16
|
addeq r2, r2, #16
|
||||||
beq 3f
|
beq 3f
|
||||||
|
Loading…
x
Reference in New Issue
Block a user