Merge "Fix: NEON copy/extend frame for small sizes"
This commit is contained in:
@@ -18,7 +18,8 @@
|
|||||||
|
|
||||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||||
|
|
||||||
;void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
|
;void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc,
|
||||||
|
; YV12_BUFFER_CONFIG *dst_ybc);
|
||||||
|
|
||||||
|vp8_yv12_copy_frame_func_neon| PROC
|
|vp8_yv12_copy_frame_func_neon| PROC
|
||||||
push {r4 - r11, lr}
|
push {r4 - r11, lr}
|
||||||
@@ -52,7 +53,8 @@ cp_src_to_dst_height_loop
|
|||||||
mov r9, r3
|
mov r9, r3
|
||||||
add r10, r2, r6
|
add r10, r2, r6
|
||||||
add r11, r3, r7
|
add r11, r3, r7
|
||||||
mov r12, r5, lsr #7
|
movs r12, r5, lsr #7
|
||||||
|
ble extra_cp_needed ; y_width < 128
|
||||||
|
|
||||||
cp_src_to_dst_width_loop
|
cp_src_to_dst_width_loop
|
||||||
vld1.8 {q0, q1}, [r8]!
|
vld1.8 {q0, q1}, [r8]!
|
||||||
@@ -83,6 +85,7 @@ cp_src_to_dst_width_loop
|
|||||||
|
|
||||||
bne cp_src_to_dst_height_loop
|
bne cp_src_to_dst_height_loop
|
||||||
|
|
||||||
|
extra_cp_needed
|
||||||
ands r10, r5, #0x7f ;check to see if extra copy is needed
|
ands r10, r5, #0x7f ;check to see if extra copy is needed
|
||||||
sub r11, r5, r10
|
sub r11, r5, r10
|
||||||
ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
|
ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
|
||||||
@@ -110,7 +113,8 @@ cp_src_to_dst_height_uv_loop
|
|||||||
mov r9, r3
|
mov r9, r3
|
||||||
add r10, r2, r6
|
add r10, r2, r6
|
||||||
add r11, r3, r7
|
add r11, r3, r7
|
||||||
mov r12, r5, lsr #6
|
movs r12, r5, lsr #6
|
||||||
|
ble extra_uv_cp_needed
|
||||||
|
|
||||||
cp_src_to_dst_width_uv_loop
|
cp_src_to_dst_width_uv_loop
|
||||||
vld1.8 {q0, q1}, [r8]!
|
vld1.8 {q0, q1}, [r8]!
|
||||||
@@ -133,6 +137,7 @@ cp_src_to_dst_width_uv_loop
|
|||||||
|
|
||||||
bne cp_src_to_dst_height_uv_loop
|
bne cp_src_to_dst_height_uv_loop
|
||||||
|
|
||||||
|
extra_uv_cp_needed
|
||||||
ands r10, r5, #0x3f ;check to see if extra copy is needed
|
ands r10, r5, #0x3f ;check to see if extra copy is needed
|
||||||
sub r11, r5, r10
|
sub r11, r5, r10
|
||||||
ldr r2, [sp] ;srcptr1
|
ldr r2, [sp] ;srcptr1
|
||||||
|
@@ -42,7 +42,8 @@ cp_src_to_dst_height_loop
|
|||||||
mov r9, r3
|
mov r9, r3
|
||||||
add r10, r2, r6
|
add r10, r2, r6
|
||||||
add r11, r3, r7
|
add r11, r3, r7
|
||||||
mov r12, r5, lsr #7
|
movs r12, r5, lsr #7
|
||||||
|
ble extra_cp_needed ; y_width < 128
|
||||||
|
|
||||||
cp_src_to_dst_width_loop
|
cp_src_to_dst_width_loop
|
||||||
vld1.8 {q0, q1}, [r8]!
|
vld1.8 {q0, q1}, [r8]!
|
||||||
@@ -73,6 +74,7 @@ cp_src_to_dst_width_loop
|
|||||||
|
|
||||||
bne cp_src_to_dst_height_loop
|
bne cp_src_to_dst_height_loop
|
||||||
|
|
||||||
|
extra_cp_needed
|
||||||
ands r10, r5, #0x7f ;check to see if extra copy is needed
|
ands r10, r5, #0x7f ;check to see if extra copy is needed
|
||||||
sub r11, r5, r10
|
sub r11, r5, r10
|
||||||
ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
|
ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
|
||||||
@@ -419,7 +421,8 @@ cp_src_to_dst_height_loop1
|
|||||||
mov r9, r3
|
mov r9, r3
|
||||||
add r10, r2, r6
|
add r10, r2, r6
|
||||||
add r11, r3, r7
|
add r11, r3, r7
|
||||||
mov r12, r5, lsr #7
|
movs r12, r5, lsr #7
|
||||||
|
ble extra_copy_needed ; y_width < 128
|
||||||
|
|
||||||
cp_src_to_dst_width_loop1
|
cp_src_to_dst_width_loop1
|
||||||
vld1.8 {q0, q1}, [r8]!
|
vld1.8 {q0, q1}, [r8]!
|
||||||
@@ -450,6 +453,7 @@ cp_src_to_dst_width_loop1
|
|||||||
|
|
||||||
bne cp_src_to_dst_height_loop1
|
bne cp_src_to_dst_height_loop1
|
||||||
|
|
||||||
|
extra_copy_needed
|
||||||
ands r10, r5, #0x7f ;check to see if extra copy is needed
|
ands r10, r5, #0x7f ;check to see if extra copy is needed
|
||||||
sub r11, r5, r10
|
sub r11, r5, r10
|
||||||
ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
|
ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
|
||||||
|
@@ -75,12 +75,13 @@ copy_left_right_y
|
|||||||
mul r8, r4, lr ; plane_height * plane_stride
|
mul r8, r4, lr ; plane_height * plane_stride
|
||||||
|
|
||||||
; copy width is plane_stride
|
; copy width is plane_stride
|
||||||
mov r12, lr, lsr #7 ; plane_stride / 128
|
movs r12, lr, lsr #7 ; plane_stride / 128
|
||||||
|
|
||||||
sub r1, r1, #32 ; src_ptr1 = y_buffer - Border
|
sub r1, r1, #32 ; src_ptr1 = y_buffer - Border
|
||||||
add r6, r1, r8 ; dest_ptr2 = src_ptr2 - plane_stride (src_ptr1 + (plane_height * plane_stride))
|
add r6, r1, r8 ; dest_ptr2 = src_ptr2 - plane_stride (src_ptr1 + (plane_height * plane_stride))
|
||||||
sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride
|
sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride
|
||||||
sub r5, r1, lr, asl #5 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
|
sub r5, r1, lr, asl #5 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
|
||||||
|
ble extra_y_copy_needed ; plane stride < 128
|
||||||
|
|
||||||
copy_top_bottom_y
|
copy_top_bottom_y
|
||||||
vld1.8 {q0, q1}, [r1]!
|
vld1.8 {q0, q1}, [r1]!
|
||||||
@@ -119,6 +120,7 @@ top_bottom_32
|
|||||||
subs r12, r12, #1
|
subs r12, r12, #1
|
||||||
bne copy_top_bottom_y
|
bne copy_top_bottom_y
|
||||||
|
|
||||||
|
extra_y_copy_needed
|
||||||
mov r7, lr, lsr #4 ; check to see if extra copy is needed
|
mov r7, lr, lsr #4 ; check to see if extra copy is needed
|
||||||
ands r7, r7, #0x7
|
ands r7, r7, #0x7
|
||||||
bne extra_top_bottom_y
|
bne extra_top_bottom_y
|
||||||
@@ -184,12 +186,13 @@ copy_left_right_uv
|
|||||||
;Now copy the top and bottom source lines into each line of the respective borders
|
;Now copy the top and bottom source lines into each line of the respective borders
|
||||||
mov r1, r7
|
mov r1, r7
|
||||||
mul r8, r4, lr ; plane_height * plane_stride
|
mul r8, r4, lr ; plane_height * plane_stride
|
||||||
mov r12, lr, lsr #6 ; plane_stride / 64
|
movs r12, lr, lsr #6 ; plane_stride / 64
|
||||||
|
|
||||||
sub r1, r1, #16 ; src_ptr1 = u_buffer - Border
|
sub r1, r1, #16 ; src_ptr1 = u_buffer - Border
|
||||||
add r6, r1, r8 ; dest_ptr2 = src_ptr2 + plane_stride (src_ptr1 + (plane_height * plane_stride)
|
add r6, r1, r8 ; dest_ptr2 = src_ptr2 + plane_stride (src_ptr1 + (plane_height * plane_stride)
|
||||||
sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride
|
sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride
|
||||||
sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
|
sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
|
||||||
|
ble extra_uv_copy_needed ; plane_stride < 64
|
||||||
|
|
||||||
copy_top_bottom_uv
|
copy_top_bottom_uv
|
||||||
vld1.8 {q0, q1}, [r1]!
|
vld1.8 {q0, q1}, [r1]!
|
||||||
@@ -219,7 +222,7 @@ top_bottom_16
|
|||||||
|
|
||||||
subs r12, r12, #1
|
subs r12, r12, #1
|
||||||
bne copy_top_bottom_uv
|
bne copy_top_bottom_uv
|
||||||
|
extra_uv_copy_needed
|
||||||
mov r7, lr, lsr #3 ; check to see if extra copy is needed
|
mov r7, lr, lsr #3 ; check to see if extra copy is needed
|
||||||
ands r7, r7, #0x7
|
ands r7, r7, #0x7
|
||||||
bne extra_top_bottom_uv
|
bne extra_top_bottom_uv
|
||||||
|
@@ -13,13 +13,13 @@
|
|||||||
#include "vpx_mem/vpx_mem.h"
|
#include "vpx_mem/vpx_mem.h"
|
||||||
#include "vpx_scale/vpxscale.h"
|
#include "vpx_scale/vpxscale.h"
|
||||||
|
|
||||||
void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
|
extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc,
|
||||||
|
YV12_BUFFER_CONFIG *dst_ybc);
|
||||||
|
|
||||||
void
|
void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc,
|
||||||
vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc)
|
YV12_BUFFER_CONFIG *dst_ybc)
|
||||||
{
|
{
|
||||||
vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);
|
vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc);
|
||||||
//printf("Border:%d; plane_stride:%d; plane_height:%d; plane_width:%d\n",dst_ybc->border,dst_ybc->y_stride,dst_ybc->y_height,dst_ybc->y_width);
|
|
||||||
|
|
||||||
vp8_yv12_extend_frame_borders_ptr(dst_ybc);
|
vp8_yv12_extend_frame_borders_neon(dst_ybc);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user