Merge remote branch 'origin/master' into experimental
Change-Id: I3f64e220b78738e5261a9fda3c270d51613f4faa
This commit is contained in:
commit
86edcb0cc7
@ -54,9 +54,11 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
|
|||||||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6;
|
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6;
|
||||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
|
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
|
||||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6;
|
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6;
|
||||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
|
rtcd->loopfilter.simple_mb_v =
|
||||||
|
vp8_loop_filter_simple_vertical_edge_armv6;
|
||||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6;
|
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6;
|
||||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
|
rtcd->loopfilter.simple_mb_h =
|
||||||
|
vp8_loop_filter_simple_horizontal_edge_armv6;
|
||||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6;
|
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6;
|
||||||
|
|
||||||
rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6;
|
rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6;
|
||||||
|
@ -53,14 +53,11 @@ count RN r5
|
|||||||
|
|
||||||
;r0 unsigned char *src_ptr,
|
;r0 unsigned char *src_ptr,
|
||||||
;r1 int src_pixel_step,
|
;r1 int src_pixel_step,
|
||||||
;r2 const char *flimit,
|
;r2 const char *blimit,
|
||||||
;r3 const char *limit,
|
;r3 const char *limit,
|
||||||
;stack const char *thresh,
|
;stack const char *thresh,
|
||||||
;stack int count
|
;stack int count
|
||||||
|
|
||||||
;Note: All 16 elements in flimit are equal. So, in the code, only one load is needed
|
|
||||||
;for flimit. Same way applies to limit and thresh.
|
|
||||||
|
|
||||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||||
|vp8_loop_filter_horizontal_edge_armv6| PROC
|
|vp8_loop_filter_horizontal_edge_armv6| PROC
|
||||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||||
@ -72,14 +69,18 @@ count RN r5
|
|||||||
sub sp, sp, #16 ; create temp buffer
|
sub sp, sp, #16 ; create temp buffer
|
||||||
|
|
||||||
ldr r9, [src], pstep ; p3
|
ldr r9, [src], pstep ; p3
|
||||||
ldr r4, [r2], #4 ; flimit
|
ldrb r4, [r2] ; blimit
|
||||||
ldr r10, [src], pstep ; p2
|
ldr r10, [src], pstep ; p2
|
||||||
ldr r2, [r3], #4 ; limit
|
ldrb r2, [r3] ; limit
|
||||||
ldr r11, [src], pstep ; p1
|
ldr r11, [src], pstep ; p1
|
||||||
uadd8 r4, r4, r4 ; flimit * 2
|
orr r4, r4, r4, lsl #8
|
||||||
ldr r3, [r6], #4 ; thresh
|
ldrb r3, [r6] ; thresh
|
||||||
|
orr r2, r2, r2, lsl #8
|
||||||
mov count, count, lsl #1 ; 4-in-parallel
|
mov count, count, lsl #1 ; 4-in-parallel
|
||||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
orr r4, r4, r4, lsl #16
|
||||||
|
orr r3, r3, r3, lsl #8
|
||||||
|
orr r2, r2, r2, lsl #16
|
||||||
|
orr r3, r3, r3, lsl #16
|
||||||
|
|
||||||
|Hnext8|
|
|Hnext8|
|
||||||
; vp8_filter_mask() function
|
; vp8_filter_mask() function
|
||||||
@ -275,14 +276,18 @@ count RN r5
|
|||||||
sub sp, sp, #16 ; create temp buffer
|
sub sp, sp, #16 ; create temp buffer
|
||||||
|
|
||||||
ldr r9, [src], pstep ; p3
|
ldr r9, [src], pstep ; p3
|
||||||
ldr r4, [r2], #4 ; flimit
|
ldrb r4, [r2] ; blimit
|
||||||
ldr r10, [src], pstep ; p2
|
ldr r10, [src], pstep ; p2
|
||||||
ldr r2, [r3], #4 ; limit
|
ldrb r2, [r3] ; limit
|
||||||
ldr r11, [src], pstep ; p1
|
ldr r11, [src], pstep ; p1
|
||||||
uadd8 r4, r4, r4 ; flimit * 2
|
orr r4, r4, r4, lsl #8
|
||||||
ldr r3, [r6], #4 ; thresh
|
ldrb r3, [r6] ; thresh
|
||||||
|
orr r2, r2, r2, lsl #8
|
||||||
mov count, count, lsl #1 ; 4-in-parallel
|
mov count, count, lsl #1 ; 4-in-parallel
|
||||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
orr r4, r4, r4, lsl #16
|
||||||
|
orr r3, r3, r3, lsl #8
|
||||||
|
orr r2, r2, r2, lsl #16
|
||||||
|
orr r3, r3, r3, lsl #16
|
||||||
|
|
||||||
|MBHnext8|
|
|MBHnext8|
|
||||||
|
|
||||||
@ -584,15 +589,19 @@ count RN r5
|
|||||||
sub sp, sp, #16 ; create temp buffer
|
sub sp, sp, #16 ; create temp buffer
|
||||||
|
|
||||||
ldr r6, [src], pstep ; load source data
|
ldr r6, [src], pstep ; load source data
|
||||||
ldr r4, [r2], #4 ; flimit
|
ldrb r4, [r2] ; blimit
|
||||||
ldr r7, [src], pstep
|
ldr r7, [src], pstep
|
||||||
ldr r2, [r3], #4 ; limit
|
ldrb r2, [r3] ; limit
|
||||||
ldr r8, [src], pstep
|
ldr r8, [src], pstep
|
||||||
uadd8 r4, r4, r4 ; flimit * 2
|
orr r4, r4, r4, lsl #8
|
||||||
ldr r3, [r12], #4 ; thresh
|
ldrb r3, [r12] ; thresh
|
||||||
|
orr r2, r2, r2, lsl #8
|
||||||
ldr lr, [src], pstep
|
ldr lr, [src], pstep
|
||||||
mov count, count, lsl #1 ; 4-in-parallel
|
mov count, count, lsl #1 ; 4-in-parallel
|
||||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
orr r4, r4, r4, lsl #16
|
||||||
|
orr r3, r3, r3, lsl #8
|
||||||
|
orr r2, r2, r2, lsl #16
|
||||||
|
orr r3, r3, r3, lsl #16
|
||||||
|
|
||||||
|Vnext8|
|
|Vnext8|
|
||||||
|
|
||||||
@ -855,18 +864,22 @@ count RN r5
|
|||||||
sub sp, sp, #16 ; create temp buffer
|
sub sp, sp, #16 ; create temp buffer
|
||||||
|
|
||||||
ldr r6, [src], pstep ; load source data
|
ldr r6, [src], pstep ; load source data
|
||||||
ldr r4, [r2], #4 ; flimit
|
ldrb r4, [r2] ; blimit
|
||||||
pld [src, #23]
|
pld [src, #23]
|
||||||
ldr r7, [src], pstep
|
ldr r7, [src], pstep
|
||||||
ldr r2, [r3], #4 ; limit
|
ldrb r2, [r3] ; limit
|
||||||
pld [src, #23]
|
pld [src, #23]
|
||||||
ldr r8, [src], pstep
|
ldr r8, [src], pstep
|
||||||
uadd8 r4, r4, r4 ; flimit * 2
|
orr r4, r4, r4, lsl #8
|
||||||
ldr r3, [r12], #4 ; thresh
|
ldrb r3, [r12] ; thresh
|
||||||
|
orr r2, r2, r2, lsl #8
|
||||||
pld [src, #23]
|
pld [src, #23]
|
||||||
ldr lr, [src], pstep
|
ldr lr, [src], pstep
|
||||||
mov count, count, lsl #1 ; 4-in-parallel
|
mov count, count, lsl #1 ; 4-in-parallel
|
||||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
orr r4, r4, r4, lsl #16
|
||||||
|
orr r3, r3, r3, lsl #8
|
||||||
|
orr r2, r2, r2, lsl #16
|
||||||
|
orr r3, r3, r3, lsl #16
|
||||||
|
|
||||||
|MBVnext8|
|
|MBVnext8|
|
||||||
; vp8_filter_mask() function
|
; vp8_filter_mask() function
|
||||||
@ -906,6 +919,7 @@ count RN r5
|
|||||||
str lr, [sp, #8]
|
str lr, [sp, #8]
|
||||||
ldr lr, [src], pstep
|
ldr lr, [src], pstep
|
||||||
|
|
||||||
|
|
||||||
TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12
|
TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12
|
||||||
|
|
||||||
ldr lr, [sp, #8] ; load back (f)limit accumulator
|
ldr lr, [sp, #8] ; load back (f)limit accumulator
|
||||||
@ -954,6 +968,7 @@ count RN r5
|
|||||||
beq mbvskip_filter ; skip filtering
|
beq mbvskip_filter ; skip filtering
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;vp8_hevmask() function
|
;vp8_hevmask() function
|
||||||
;calculate high edge variance
|
;calculate high edge variance
|
||||||
|
|
||||||
@ -1121,6 +1136,7 @@ count RN r5
|
|||||||
smlabb r8, r6, lr, r7
|
smlabb r8, r6, lr, r7
|
||||||
smlatb r6, r6, lr, r7
|
smlatb r6, r6, lr, r7
|
||||||
smlabb r9, r10, lr, r7
|
smlabb r9, r10, lr, r7
|
||||||
|
|
||||||
smlatb r10, r10, lr, r7
|
smlatb r10, r10, lr, r7
|
||||||
ssat r8, #8, r8, asr #7
|
ssat r8, #8, r8, asr #7
|
||||||
ssat r6, #8, r6, asr #7
|
ssat r6, #8, r6, asr #7
|
||||||
|
@ -45,35 +45,28 @@
|
|||||||
MEND
|
MEND
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
src RN r0
|
src RN r0
|
||||||
pstep RN r1
|
pstep RN r1
|
||||||
|
|
||||||
;r0 unsigned char *src_ptr,
|
;r0 unsigned char *src_ptr,
|
||||||
;r1 int src_pixel_step,
|
;r1 int src_pixel_step,
|
||||||
;r2 const char *flimit,
|
;r2 const char *blimit
|
||||||
;r3 const char *limit,
|
|
||||||
;stack const char *thresh,
|
|
||||||
;stack int count
|
|
||||||
|
|
||||||
; All 16 elements in flimit are equal. So, in the code, only one load is needed
|
|
||||||
; for flimit. Same applies to limit. thresh is not used in simple looopfilter
|
|
||||||
|
|
||||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||||
|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
|
|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
|
||||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||||
stmdb sp!, {r4 - r11, lr}
|
stmdb sp!, {r4 - r11, lr}
|
||||||
|
|
||||||
ldr r12, [r3] ; limit
|
ldrb r12, [r2] ; blimit
|
||||||
ldr r3, [src, -pstep, lsl #1] ; p1
|
ldr r3, [src, -pstep, lsl #1] ; p1
|
||||||
ldr r4, [src, -pstep] ; p0
|
ldr r4, [src, -pstep] ; p0
|
||||||
ldr r5, [src] ; q0
|
ldr r5, [src] ; q0
|
||||||
ldr r6, [src, pstep] ; q1
|
ldr r6, [src, pstep] ; q1
|
||||||
ldr r7, [r2] ; flimit
|
orr r12, r12, r12, lsl #8 ; blimit
|
||||||
ldr r2, c0x80808080
|
ldr r2, c0x80808080
|
||||||
ldr r9, [sp, #40] ; count for 8-in-parallel
|
orr r12, r12, r12, lsl #16 ; blimit
|
||||||
uadd8 r7, r7, r7 ; flimit * 2
|
mov r9, #4 ; double the count. we're doing 4 at a time
|
||||||
mov r9, r9, lsl #1 ; double the count. we're doing 4 at a time
|
|
||||||
uadd8 r12, r7, r12 ; flimit * 2 + limit
|
|
||||||
mov lr, #0 ; need 0 in a couple places
|
mov lr, #0 ; need 0 in a couple places
|
||||||
|
|
||||||
|simple_hnext8|
|
|simple_hnext8|
|
||||||
@ -148,34 +141,32 @@ pstep RN r1
|
|||||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||||
stmdb sp!, {r4 - r11, lr}
|
stmdb sp!, {r4 - r11, lr}
|
||||||
|
|
||||||
ldr r12, [r2] ; r12: flimit
|
ldrb r12, [r2] ; r12: blimit
|
||||||
ldr r2, c0x80808080
|
ldr r2, c0x80808080
|
||||||
ldr r7, [r3] ; limit
|
orr r12, r12, r12, lsl #8
|
||||||
|
|
||||||
; load soure data to r7, r8, r9, r10
|
; load soure data to r7, r8, r9, r10
|
||||||
ldrh r3, [src, #-2]
|
ldrh r3, [src, #-2]
|
||||||
pld [src, #23] ; preload for next block
|
pld [src, #23] ; preload for next block
|
||||||
ldrh r4, [src], pstep
|
ldrh r4, [src], pstep
|
||||||
uadd8 r12, r12, r12 ; flimit * 2
|
orr r12, r12, r12, lsl #16
|
||||||
|
|
||||||
ldrh r5, [src, #-2]
|
ldrh r5, [src, #-2]
|
||||||
pld [src, #23]
|
pld [src, #23]
|
||||||
ldrh r6, [src], pstep
|
ldrh r6, [src], pstep
|
||||||
uadd8 r12, r12, r7 ; flimit * 2 + limit
|
|
||||||
|
|
||||||
pkhbt r7, r3, r4, lsl #16
|
pkhbt r7, r3, r4, lsl #16
|
||||||
|
|
||||||
ldrh r3, [src, #-2]
|
ldrh r3, [src, #-2]
|
||||||
pld [src, #23]
|
pld [src, #23]
|
||||||
ldrh r4, [src], pstep
|
ldrh r4, [src], pstep
|
||||||
ldr r11, [sp, #40] ; count (r11) for 8-in-parallel
|
|
||||||
|
|
||||||
pkhbt r8, r5, r6, lsl #16
|
pkhbt r8, r5, r6, lsl #16
|
||||||
|
|
||||||
ldrh r5, [src, #-2]
|
ldrh r5, [src, #-2]
|
||||||
pld [src, #23]
|
pld [src, #23]
|
||||||
ldrh r6, [src], pstep
|
ldrh r6, [src], pstep
|
||||||
mov r11, r11, lsl #1 ; 4-in-parallel
|
mov r11, #4 ; double the count. we're doing 4 at a time
|
||||||
|
|
||||||
|simple_vnext8|
|
|simple_vnext8|
|
||||||
; vp8_simple_filter_mask() function
|
; vp8_simple_filter_mask() function
|
||||||
|
@ -9,30 +9,34 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "vpx_ports/config.h"
|
#include "vpx_config.h"
|
||||||
#include <math.h>
|
|
||||||
#include "vp8/common/loopfilter.h"
|
#include "vp8/common/loopfilter.h"
|
||||||
#include "vp8/common/onyxc_int.h"
|
#include "vp8/common/onyxc_int.h"
|
||||||
|
|
||||||
|
#if HAVE_ARMV6
|
||||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
||||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
|
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
|
||||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
|
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
|
||||||
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
|
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
|
||||||
extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
|
#endif
|
||||||
extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
|
|
||||||
|
|
||||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_y_neon);
|
#if HAVE_ARMV7
|
||||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_y_neon);
|
typedef void loopfilter_y_neon(unsigned char *src, int pitch,
|
||||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_y_neon);
|
unsigned char blimit, unsigned char limit, unsigned char thresh);
|
||||||
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_y_neon);
|
typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
|
||||||
extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_neon);
|
unsigned char blimit, unsigned char limit, unsigned char thresh,
|
||||||
extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_neon);
|
unsigned char *v);
|
||||||
|
|
||||||
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_neon;
|
extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
|
||||||
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_neon;
|
extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
|
||||||
extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_neon;
|
extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
|
||||||
extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;
|
extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
|
||||||
|
|
||||||
|
extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
|
||||||
|
extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
|
||||||
|
extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon;
|
||||||
|
extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
|
||||||
|
#endif
|
||||||
|
|
||||||
#if HAVE_ARMV6
|
#if HAVE_ARMV6
|
||||||
/*ARMV6 loopfilter functions*/
|
/*ARMV6 loopfilter functions*/
|
||||||
@ -40,96 +44,72 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;
|
|||||||
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Vertical MB Filtering */
|
/* Vertical MB Filtering */
|
||||||
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Horizontal B Filtering */
|
/* Horizontal B Filtering */
|
||||||
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
const unsigned char *blimit)
|
||||||
{
|
{
|
||||||
(void) u_ptr;
|
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||||
(void) v_ptr;
|
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||||
(void) uv_stride;
|
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Vertical B Filtering */
|
/* Vertical B Filtering */
|
||||||
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
const unsigned char *blimit)
|
||||||
{
|
{
|
||||||
(void) u_ptr;
|
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
|
||||||
(void) v_ptr;
|
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
|
||||||
(void) uv_stride;
|
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
|
||||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -139,83 +119,58 @@ void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
|||||||
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
unsigned char mblim = *lfi->mblim;
|
||||||
|
unsigned char lim = *lfi->lim;
|
||||||
|
unsigned char hev_thr = *lfi->hev_thr;
|
||||||
|
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||||
}
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Vertical MB Filtering */
|
/* Vertical MB Filtering */
|
||||||
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
unsigned char mblim = *lfi->mblim;
|
||||||
|
unsigned char lim = *lfi->lim;
|
||||||
|
unsigned char hev_thr = *lfi->hev_thr;
|
||||||
|
|
||||||
|
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||||
}
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Horizontal B Filtering */
|
/* Horizontal B Filtering */
|
||||||
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
unsigned char blim = *lfi->blim;
|
||||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
unsigned char lim = *lfi->lim;
|
||||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
unsigned char hev_thr = *lfi->hev_thr;
|
||||||
|
|
||||||
|
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr);
|
||||||
|
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr);
|
||||||
|
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride);
|
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride);
|
||||||
}
|
|
||||||
|
|
||||||
void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Vertical B Filtering */
|
/* Vertical B Filtering */
|
||||||
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
unsigned char blim = *lfi->blim;
|
||||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
unsigned char lim = *lfi->lim;
|
||||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
unsigned char hev_thr = *lfi->hev_thr;
|
||||||
|
|
||||||
|
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
|
||||||
|
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
|
||||||
|
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4);
|
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4);
|
||||||
}
|
|
||||||
|
|
||||||
void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -12,15 +12,17 @@
|
|||||||
#ifndef LOOPFILTER_ARM_H
|
#ifndef LOOPFILTER_ARM_H
|
||||||
#define LOOPFILTER_ARM_H
|
#define LOOPFILTER_ARM_H
|
||||||
|
|
||||||
|
#include "vpx_config.h"
|
||||||
|
|
||||||
#if HAVE_ARMV6
|
#if HAVE_ARMV6
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbv_armv6);
|
extern prototype_loopfilter_block(vp8_loop_filter_mbv_armv6);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_armv6);
|
extern prototype_loopfilter_block(vp8_loop_filter_bv_armv6);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_armv6);
|
extern prototype_loopfilter_block(vp8_loop_filter_mbh_armv6);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_armv6);
|
extern prototype_loopfilter_block(vp8_loop_filter_bh_armv6);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_armv6);
|
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_armv6);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
|
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_armv6);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
|
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
|
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
|
||||||
|
|
||||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||||
#undef vp8_lf_normal_mb_v
|
#undef vp8_lf_normal_mb_v
|
||||||
@ -36,28 +38,29 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
|
|||||||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_armv6
|
#define vp8_lf_normal_b_h vp8_loop_filter_bh_armv6
|
||||||
|
|
||||||
#undef vp8_lf_simple_mb_v
|
#undef vp8_lf_simple_mb_v
|
||||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_armv6
|
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_armv6
|
||||||
|
|
||||||
#undef vp8_lf_simple_b_v
|
#undef vp8_lf_simple_b_v
|
||||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_armv6
|
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_armv6
|
||||||
|
|
||||||
#undef vp8_lf_simple_mb_h
|
#undef vp8_lf_simple_mb_h
|
||||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_armv6
|
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_armv6
|
||||||
|
|
||||||
#undef vp8_lf_simple_b_h
|
#undef vp8_lf_simple_b_h
|
||||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
|
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
|
||||||
#endif
|
#endif /* !CONFIG_RUNTIME_CPU_DETECT */
|
||||||
#endif
|
|
||||||
|
#endif /* HAVE_ARMV6 */
|
||||||
|
|
||||||
#if HAVE_ARMV7
|
#if HAVE_ARMV7
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
|
extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_neon);
|
extern prototype_loopfilter_block(vp8_loop_filter_bv_neon);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_neon);
|
extern prototype_loopfilter_block(vp8_loop_filter_mbh_neon);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_neon);
|
extern prototype_loopfilter_block(vp8_loop_filter_bh_neon);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_neon);
|
extern prototype_simple_loopfilter(vp8_loop_filter_mbvs_neon);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
|
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_neon);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
|
extern prototype_simple_loopfilter(vp8_loop_filter_mbhs_neon);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
|
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_neon);
|
||||||
|
|
||||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||||
#undef vp8_lf_normal_mb_v
|
#undef vp8_lf_normal_mb_v
|
||||||
@ -83,7 +86,8 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
|
|||||||
|
|
||||||
#undef vp8_lf_simple_b_h
|
#undef vp8_lf_simple_b_h
|
||||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
|
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
|
||||||
#endif
|
#endif /* !CONFIG_RUNTIME_CPU_DETECT */
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
#endif /* HAVE_ARMV7 */
|
||||||
|
|
||||||
|
#endif /* LOOPFILTER_ARM_H */
|
||||||
|
@ -14,109 +14,97 @@
|
|||||||
EXPORT |vp8_loop_filter_vertical_edge_y_neon|
|
EXPORT |vp8_loop_filter_vertical_edge_y_neon|
|
||||||
EXPORT |vp8_loop_filter_vertical_edge_uv_neon|
|
EXPORT |vp8_loop_filter_vertical_edge_uv_neon|
|
||||||
ARM
|
ARM
|
||||||
REQUIRE8
|
|
||||||
PRESERVE8
|
|
||||||
|
|
||||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||||
|
|
||||||
; flimit, limit, and thresh should be positive numbers.
|
|
||||||
; All 16 elements in these variables are equal.
|
|
||||||
|
|
||||||
; void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
|
|
||||||
; const signed char *flimit,
|
|
||||||
; const signed char *limit,
|
|
||||||
; const signed char *thresh,
|
|
||||||
; int count)
|
|
||||||
; r0 unsigned char *src
|
; r0 unsigned char *src
|
||||||
; r1 int pitch
|
; r1 int pitch
|
||||||
; r2 const signed char *flimit
|
; r2 unsigned char blimit
|
||||||
; r3 const signed char *limit
|
; r3 unsigned char limit
|
||||||
; sp const signed char *thresh,
|
; sp unsigned char thresh,
|
||||||
; sp+4 int count (unused)
|
|
||||||
|vp8_loop_filter_horizontal_edge_y_neon| PROC
|
|vp8_loop_filter_horizontal_edge_y_neon| PROC
|
||||||
stmdb sp!, {lr}
|
push {lr}
|
||||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
vdup.u8 q0, r2 ; duplicate blimit
|
||||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
vdup.u8 q1, r3 ; duplicate limit
|
||||||
sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
|
sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
|
||||||
ldr r12, [sp, #4] ; load thresh pointer
|
ldr r3, [sp, #4] ; load thresh
|
||||||
|
add r12, r2, r1
|
||||||
|
add r1, r1, r1
|
||||||
|
|
||||||
vld1.u8 {q3}, [r2], r1 ; p3
|
vdup.u8 q2, r3 ; duplicate thresh
|
||||||
vld1.u8 {q4}, [r2], r1 ; p2
|
|
||||||
vld1.u8 {q5}, [r2], r1 ; p1
|
vld1.u8 {q3}, [r2@128], r1 ; p3
|
||||||
vld1.u8 {q6}, [r2], r1 ; p0
|
vld1.u8 {q4}, [r12@128], r1 ; p2
|
||||||
vld1.u8 {q7}, [r2], r1 ; q0
|
vld1.u8 {q5}, [r2@128], r1 ; p1
|
||||||
vld1.u8 {q8}, [r2], r1 ; q1
|
vld1.u8 {q6}, [r12@128], r1 ; p0
|
||||||
vld1.u8 {q9}, [r2], r1 ; q2
|
vld1.u8 {q7}, [r2@128], r1 ; q0
|
||||||
vld1.u8 {q10}, [r2] ; q3
|
vld1.u8 {q8}, [r12@128], r1 ; q1
|
||||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
vld1.u8 {q9}, [r2@128] ; q2
|
||||||
sub r0, r0, r1, lsl #1
|
vld1.u8 {q10}, [r12@128] ; q3
|
||||||
|
|
||||||
|
sub r2, r2, r1, lsl #1
|
||||||
|
sub r12, r12, r1, lsl #1
|
||||||
|
|
||||||
bl vp8_loop_filter_neon
|
bl vp8_loop_filter_neon
|
||||||
|
|
||||||
vst1.u8 {q5}, [r0], r1 ; store op1
|
vst1.u8 {q5}, [r2@128], r1 ; store op1
|
||||||
vst1.u8 {q6}, [r0], r1 ; store op0
|
vst1.u8 {q6}, [r12@128], r1 ; store op0
|
||||||
vst1.u8 {q7}, [r0], r1 ; store oq0
|
vst1.u8 {q7}, [r2@128], r1 ; store oq0
|
||||||
vst1.u8 {q8}, [r0], r1 ; store oq1
|
vst1.u8 {q8}, [r12@128], r1 ; store oq1
|
||||||
|
|
||||||
ldmia sp!, {pc}
|
pop {pc}
|
||||||
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
|
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
|
||||||
|
|
||||||
; void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch
|
|
||||||
; const signed char *flimit,
|
|
||||||
; const signed char *limit,
|
|
||||||
; const signed char *thresh,
|
|
||||||
; unsigned char *v)
|
|
||||||
; r0 unsigned char *u,
|
; r0 unsigned char *u,
|
||||||
; r1 int pitch,
|
; r1 int pitch,
|
||||||
; r2 const signed char *flimit,
|
; r2 unsigned char blimit
|
||||||
; r3 const signed char *limit,
|
; r3 unsigned char limit
|
||||||
; sp const signed char *thresh,
|
; sp unsigned char thresh,
|
||||||
; sp+4 unsigned char *v
|
; sp+4 unsigned char *v
|
||||||
|vp8_loop_filter_horizontal_edge_uv_neon| PROC
|
|vp8_loop_filter_horizontal_edge_uv_neon| PROC
|
||||||
stmdb sp!, {lr}
|
push {lr}
|
||||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
vdup.u8 q0, r2 ; duplicate blimit
|
||||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
vdup.u8 q1, r3 ; duplicate limit
|
||||||
|
ldr r12, [sp, #4] ; load thresh
|
||||||
ldr r2, [sp, #8] ; load v ptr
|
ldr r2, [sp, #8] ; load v ptr
|
||||||
|
vdup.u8 q2, r12 ; duplicate thresh
|
||||||
|
|
||||||
sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines
|
sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines
|
||||||
vld1.u8 {d6}, [r3], r1 ; p3
|
|
||||||
vld1.u8 {d8}, [r3], r1 ; p2
|
|
||||||
vld1.u8 {d10}, [r3], r1 ; p1
|
|
||||||
vld1.u8 {d12}, [r3], r1 ; p0
|
|
||||||
vld1.u8 {d14}, [r3], r1 ; q0
|
|
||||||
vld1.u8 {d16}, [r3], r1 ; q1
|
|
||||||
vld1.u8 {d18}, [r3], r1 ; q2
|
|
||||||
vld1.u8 {d20}, [r3] ; q3
|
|
||||||
|
|
||||||
ldr r3, [sp, #4] ; load thresh pointer
|
|
||||||
|
|
||||||
sub r12, r2, r1, lsl #2 ; move v pointer down by 4 lines
|
sub r12, r2, r1, lsl #2 ; move v pointer down by 4 lines
|
||||||
vld1.u8 {d7}, [r12], r1 ; p3
|
|
||||||
vld1.u8 {d9}, [r12], r1 ; p2
|
|
||||||
vld1.u8 {d11}, [r12], r1 ; p1
|
|
||||||
vld1.u8 {d13}, [r12], r1 ; p0
|
|
||||||
vld1.u8 {d15}, [r12], r1 ; q0
|
|
||||||
vld1.u8 {d17}, [r12], r1 ; q1
|
|
||||||
vld1.u8 {d19}, [r12], r1 ; q2
|
|
||||||
vld1.u8 {d21}, [r12] ; q3
|
|
||||||
|
|
||||||
vld1.s8 {d4[], d5[]}, [r3] ; thresh
|
vld1.u8 {d6}, [r3@64], r1 ; p3
|
||||||
|
vld1.u8 {d7}, [r12@64], r1 ; p3
|
||||||
|
vld1.u8 {d8}, [r3@64], r1 ; p2
|
||||||
|
vld1.u8 {d9}, [r12@64], r1 ; p2
|
||||||
|
vld1.u8 {d10}, [r3@64], r1 ; p1
|
||||||
|
vld1.u8 {d11}, [r12@64], r1 ; p1
|
||||||
|
vld1.u8 {d12}, [r3@64], r1 ; p0
|
||||||
|
vld1.u8 {d13}, [r12@64], r1 ; p0
|
||||||
|
vld1.u8 {d14}, [r3@64], r1 ; q0
|
||||||
|
vld1.u8 {d15}, [r12@64], r1 ; q0
|
||||||
|
vld1.u8 {d16}, [r3@64], r1 ; q1
|
||||||
|
vld1.u8 {d17}, [r12@64], r1 ; q1
|
||||||
|
vld1.u8 {d18}, [r3@64], r1 ; q2
|
||||||
|
vld1.u8 {d19}, [r12@64], r1 ; q2
|
||||||
|
vld1.u8 {d20}, [r3@64] ; q3
|
||||||
|
vld1.u8 {d21}, [r12@64] ; q3
|
||||||
|
|
||||||
bl vp8_loop_filter_neon
|
bl vp8_loop_filter_neon
|
||||||
|
|
||||||
sub r0, r0, r1, lsl #1
|
sub r0, r0, r1, lsl #1
|
||||||
sub r2, r2, r1, lsl #1
|
sub r2, r2, r1, lsl #1
|
||||||
|
|
||||||
vst1.u8 {d10}, [r0], r1 ; store u op1
|
vst1.u8 {d10}, [r0@64], r1 ; store u op1
|
||||||
vst1.u8 {d11}, [r2], r1 ; store v op1
|
vst1.u8 {d11}, [r2@64], r1 ; store v op1
|
||||||
vst1.u8 {d12}, [r0], r1 ; store u op0
|
vst1.u8 {d12}, [r0@64], r1 ; store u op0
|
||||||
vst1.u8 {d13}, [r2], r1 ; store v op0
|
vst1.u8 {d13}, [r2@64], r1 ; store v op0
|
||||||
vst1.u8 {d14}, [r0], r1 ; store u oq0
|
vst1.u8 {d14}, [r0@64], r1 ; store u oq0
|
||||||
vst1.u8 {d15}, [r2], r1 ; store v oq0
|
vst1.u8 {d15}, [r2@64], r1 ; store v oq0
|
||||||
vst1.u8 {d16}, [r0] ; store u oq1
|
vst1.u8 {d16}, [r0@64] ; store u oq1
|
||||||
vst1.u8 {d17}, [r2] ; store v oq1
|
vst1.u8 {d17}, [r2@64] ; store v oq1
|
||||||
|
|
||||||
ldmia sp!, {pc}
|
pop {pc}
|
||||||
ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon|
|
ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon|
|
||||||
|
|
||||||
; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
|
; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
|
||||||
@ -124,39 +112,38 @@
|
|||||||
; const signed char *limit,
|
; const signed char *limit,
|
||||||
; const signed char *thresh,
|
; const signed char *thresh,
|
||||||
; int count)
|
; int count)
|
||||||
; r0 unsigned char *src,
|
; r0 unsigned char *src
|
||||||
; r1 int pitch,
|
; r1 int pitch
|
||||||
; r2 const signed char *flimit,
|
; r2 unsigned char blimit
|
||||||
; r3 const signed char *limit,
|
; r3 unsigned char limit
|
||||||
; sp const signed char *thresh,
|
; sp unsigned char thresh,
|
||||||
; sp+4 int count (unused)
|
|
||||||
|vp8_loop_filter_vertical_edge_y_neon| PROC
|
|vp8_loop_filter_vertical_edge_y_neon| PROC
|
||||||
stmdb sp!, {lr}
|
push {lr}
|
||||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
vdup.u8 q0, r2 ; duplicate blimit
|
||||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
vdup.u8 q1, r3 ; duplicate limit
|
||||||
sub r2, r0, #4 ; src ptr down by 4 columns
|
sub r2, r0, #4 ; src ptr down by 4 columns
|
||||||
sub r0, r0, #2 ; dst ptr
|
add r1, r1, r1
|
||||||
ldr r12, [sp, #4] ; load thresh pointer
|
ldr r3, [sp, #4] ; load thresh
|
||||||
|
add r12, r2, r1, asr #1
|
||||||
|
|
||||||
vld1.u8 {d6}, [r2], r1 ; load first 8-line src data
|
vld1.u8 {d6}, [r2], r1
|
||||||
vld1.u8 {d8}, [r2], r1
|
vld1.u8 {d8}, [r12], r1
|
||||||
vld1.u8 {d10}, [r2], r1
|
vld1.u8 {d10}, [r2], r1
|
||||||
vld1.u8 {d12}, [r2], r1
|
vld1.u8 {d12}, [r12], r1
|
||||||
vld1.u8 {d14}, [r2], r1
|
vld1.u8 {d14}, [r2], r1
|
||||||
vld1.u8 {d16}, [r2], r1
|
vld1.u8 {d16}, [r12], r1
|
||||||
vld1.u8 {d18}, [r2], r1
|
vld1.u8 {d18}, [r2], r1
|
||||||
vld1.u8 {d20}, [r2], r1
|
vld1.u8 {d20}, [r12], r1
|
||||||
|
|
||||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
|
||||||
|
|
||||||
vld1.u8 {d7}, [r2], r1 ; load second 8-line src data
|
vld1.u8 {d7}, [r2], r1 ; load second 8-line src data
|
||||||
vld1.u8 {d9}, [r2], r1
|
vld1.u8 {d9}, [r12], r1
|
||||||
vld1.u8 {d11}, [r2], r1
|
vld1.u8 {d11}, [r2], r1
|
||||||
vld1.u8 {d13}, [r2], r1
|
vld1.u8 {d13}, [r12], r1
|
||||||
vld1.u8 {d15}, [r2], r1
|
vld1.u8 {d15}, [r2], r1
|
||||||
vld1.u8 {d17}, [r2], r1
|
vld1.u8 {d17}, [r12], r1
|
||||||
vld1.u8 {d19}, [r2], r1
|
vld1.u8 {d19}, [r2]
|
||||||
vld1.u8 {d21}, [r2]
|
vld1.u8 {d21}, [r12]
|
||||||
|
|
||||||
;transpose to 8x16 matrix
|
;transpose to 8x16 matrix
|
||||||
vtrn.32 q3, q7
|
vtrn.32 q3, q7
|
||||||
@ -164,6 +151,8 @@
|
|||||||
vtrn.32 q5, q9
|
vtrn.32 q5, q9
|
||||||
vtrn.32 q6, q10
|
vtrn.32 q6, q10
|
||||||
|
|
||||||
|
vdup.u8 q2, r3 ; duplicate thresh
|
||||||
|
|
||||||
vtrn.16 q3, q5
|
vtrn.16 q3, q5
|
||||||
vtrn.16 q4, q6
|
vtrn.16 q4, q6
|
||||||
vtrn.16 q7, q9
|
vtrn.16 q7, q9
|
||||||
@ -178,28 +167,34 @@
|
|||||||
|
|
||||||
vswp d12, d11
|
vswp d12, d11
|
||||||
vswp d16, d13
|
vswp d16, d13
|
||||||
|
|
||||||
|
sub r0, r0, #2 ; dst ptr
|
||||||
|
|
||||||
vswp d14, d12
|
vswp d14, d12
|
||||||
vswp d16, d15
|
vswp d16, d15
|
||||||
|
|
||||||
|
add r12, r0, r1, asr #1
|
||||||
|
|
||||||
;store op1, op0, oq0, oq1
|
;store op1, op0, oq0, oq1
|
||||||
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||||
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
|
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r12], r1
|
||||||
vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
|
vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
|
||||||
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
|
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r12], r1
|
||||||
vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
|
vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
|
||||||
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
|
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r12], r1
|
||||||
vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
|
vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
|
||||||
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
|
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r12], r1
|
||||||
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
|
|
||||||
vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r0], r1
|
|
||||||
vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
|
|
||||||
vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r0], r1
|
|
||||||
vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
|
|
||||||
vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
|
|
||||||
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0], r1
|
|
||||||
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r0]
|
|
||||||
|
|
||||||
ldmia sp!, {pc}
|
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
|
||||||
|
vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r12], r1
|
||||||
|
vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
|
||||||
|
vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r12], r1
|
||||||
|
vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
|
||||||
|
vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r12], r1
|
||||||
|
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0]
|
||||||
|
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r12]
|
||||||
|
|
||||||
|
pop {pc}
|
||||||
ENDP ; |vp8_loop_filter_vertical_edge_y_neon|
|
ENDP ; |vp8_loop_filter_vertical_edge_y_neon|
|
||||||
|
|
||||||
; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
|
; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
|
||||||
@ -209,38 +204,36 @@
|
|||||||
; unsigned char *v)
|
; unsigned char *v)
|
||||||
; r0 unsigned char *u,
|
; r0 unsigned char *u,
|
||||||
; r1 int pitch,
|
; r1 int pitch,
|
||||||
; r2 const signed char *flimit,
|
; r2 unsigned char blimit
|
||||||
; r3 const signed char *limit,
|
; r3 unsigned char limit
|
||||||
; sp const signed char *thresh,
|
; sp unsigned char thresh,
|
||||||
; sp+4 unsigned char *v
|
; sp+4 unsigned char *v
|
||||||
|vp8_loop_filter_vertical_edge_uv_neon| PROC
|
|vp8_loop_filter_vertical_edge_uv_neon| PROC
|
||||||
stmdb sp!, {lr}
|
push {lr}
|
||||||
sub r12, r0, #4 ; move u pointer down by 4 columns
|
vdup.u8 q0, r2 ; duplicate blimit
|
||||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
sub r12, r0, #4 ; move u pointer down by 4 columns
|
||||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
|
||||||
|
|
||||||
ldr r2, [sp, #8] ; load v ptr
|
ldr r2, [sp, #8] ; load v ptr
|
||||||
|
vdup.u8 q1, r3 ; duplicate limit
|
||||||
vld1.u8 {d6}, [r12], r1 ;load u data
|
|
||||||
vld1.u8 {d8}, [r12], r1
|
|
||||||
vld1.u8 {d10}, [r12], r1
|
|
||||||
vld1.u8 {d12}, [r12], r1
|
|
||||||
vld1.u8 {d14}, [r12], r1
|
|
||||||
vld1.u8 {d16}, [r12], r1
|
|
||||||
vld1.u8 {d18}, [r12], r1
|
|
||||||
vld1.u8 {d20}, [r12]
|
|
||||||
|
|
||||||
sub r3, r2, #4 ; move v pointer down by 4 columns
|
sub r3, r2, #4 ; move v pointer down by 4 columns
|
||||||
|
|
||||||
|
vld1.u8 {d6}, [r12], r1 ;load u data
|
||||||
vld1.u8 {d7}, [r3], r1 ;load v data
|
vld1.u8 {d7}, [r3], r1 ;load v data
|
||||||
|
vld1.u8 {d8}, [r12], r1
|
||||||
vld1.u8 {d9}, [r3], r1
|
vld1.u8 {d9}, [r3], r1
|
||||||
|
vld1.u8 {d10}, [r12], r1
|
||||||
vld1.u8 {d11}, [r3], r1
|
vld1.u8 {d11}, [r3], r1
|
||||||
|
vld1.u8 {d12}, [r12], r1
|
||||||
vld1.u8 {d13}, [r3], r1
|
vld1.u8 {d13}, [r3], r1
|
||||||
|
vld1.u8 {d14}, [r12], r1
|
||||||
vld1.u8 {d15}, [r3], r1
|
vld1.u8 {d15}, [r3], r1
|
||||||
|
vld1.u8 {d16}, [r12], r1
|
||||||
vld1.u8 {d17}, [r3], r1
|
vld1.u8 {d17}, [r3], r1
|
||||||
|
vld1.u8 {d18}, [r12], r1
|
||||||
vld1.u8 {d19}, [r3], r1
|
vld1.u8 {d19}, [r3], r1
|
||||||
|
vld1.u8 {d20}, [r12]
|
||||||
vld1.u8 {d21}, [r3]
|
vld1.u8 {d21}, [r3]
|
||||||
|
|
||||||
ldr r12, [sp, #4] ; load thresh pointer
|
ldr r12, [sp, #4] ; load thresh
|
||||||
|
|
||||||
;transpose to 8x16 matrix
|
;transpose to 8x16 matrix
|
||||||
vtrn.32 q3, q7
|
vtrn.32 q3, q7
|
||||||
@ -248,6 +241,8 @@
|
|||||||
vtrn.32 q5, q9
|
vtrn.32 q5, q9
|
||||||
vtrn.32 q6, q10
|
vtrn.32 q6, q10
|
||||||
|
|
||||||
|
vdup.u8 q2, r12 ; duplicate thresh
|
||||||
|
|
||||||
vtrn.16 q3, q5
|
vtrn.16 q3, q5
|
||||||
vtrn.16 q4, q6
|
vtrn.16 q4, q6
|
||||||
vtrn.16 q7, q9
|
vtrn.16 q7, q9
|
||||||
@ -258,18 +253,16 @@
|
|||||||
vtrn.8 q7, q8
|
vtrn.8 q7, q8
|
||||||
vtrn.8 q9, q10
|
vtrn.8 q9, q10
|
||||||
|
|
||||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
|
||||||
|
|
||||||
bl vp8_loop_filter_neon
|
bl vp8_loop_filter_neon
|
||||||
|
|
||||||
sub r0, r0, #2
|
|
||||||
sub r2, r2, #2
|
|
||||||
|
|
||||||
vswp d12, d11
|
vswp d12, d11
|
||||||
vswp d16, d13
|
vswp d16, d13
|
||||||
vswp d14, d12
|
vswp d14, d12
|
||||||
vswp d16, d15
|
vswp d16, d15
|
||||||
|
|
||||||
|
sub r0, r0, #2
|
||||||
|
sub r2, r2, #2
|
||||||
|
|
||||||
;store op1, op0, oq0, oq1
|
;store op1, op0, oq0, oq1
|
||||||
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||||
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
|
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
|
||||||
@ -288,7 +281,7 @@
|
|||||||
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0]
|
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0]
|
||||||
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2]
|
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2]
|
||||||
|
|
||||||
ldmia sp!, {pc}
|
pop {pc}
|
||||||
ENDP ; |vp8_loop_filter_vertical_edge_uv_neon|
|
ENDP ; |vp8_loop_filter_vertical_edge_uv_neon|
|
||||||
|
|
||||||
; void vp8_loop_filter_neon();
|
; void vp8_loop_filter_neon();
|
||||||
@ -316,42 +309,44 @@
|
|||||||
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
|
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
|
||||||
vabd.u8 q3, q9, q8 ; abs(q2 - q1)
|
vabd.u8 q3, q9, q8 ; abs(q2 - q1)
|
||||||
vabd.u8 q4, q10, q9 ; abs(q3 - q2)
|
vabd.u8 q4, q10, q9 ; abs(q3 - q2)
|
||||||
vabd.u8 q9, q6, q7 ; abs(p0 - q0)
|
|
||||||
|
|
||||||
vmax.u8 q11, q11, q12
|
vmax.u8 q11, q11, q12
|
||||||
vmax.u8 q12, q13, q14
|
vmax.u8 q12, q13, q14
|
||||||
vmax.u8 q3, q3, q4
|
vmax.u8 q3, q3, q4
|
||||||
vmax.u8 q15, q11, q12
|
vmax.u8 q15, q11, q12
|
||||||
|
|
||||||
|
vabd.u8 q9, q6, q7 ; abs(p0 - q0)
|
||||||
|
|
||||||
; vp8_hevmask
|
; vp8_hevmask
|
||||||
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
|
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
|
||||||
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
|
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
|
||||||
vmax.u8 q15, q15, q3
|
vmax.u8 q15, q15, q3
|
||||||
|
|
||||||
vadd.u8 q0, q0, q0 ; flimit * 2
|
vmov.u8 q10, #0x80 ; 0x80
|
||||||
vadd.u8 q0, q0, q1 ; flimit * 2 + limit
|
|
||||||
vcge.u8 q15, q1, q15
|
|
||||||
|
|
||||||
vabd.u8 q2, q5, q8 ; a = abs(p1 - q1)
|
vabd.u8 q2, q5, q8 ; a = abs(p1 - q1)
|
||||||
vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2
|
vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2
|
||||||
vshr.u8 q2, q2, #1 ; a = a / 2
|
|
||||||
vqadd.u8 q9, q9, q2 ; a = b + a
|
|
||||||
vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
|
|
||||||
|
|
||||||
vmov.u8 q0, #0x80 ; 0x80
|
vcge.u8 q15, q1, q15
|
||||||
|
|
||||||
; vp8_filter() function
|
; vp8_filter() function
|
||||||
; convert to signed
|
; convert to signed
|
||||||
veor q7, q7, q0 ; qs0
|
veor q7, q7, q10 ; qs0
|
||||||
veor q6, q6, q0 ; ps0
|
vshr.u8 q2, q2, #1 ; a = a / 2
|
||||||
veor q5, q5, q0 ; ps1
|
veor q6, q6, q10 ; ps0
|
||||||
veor q8, q8, q0 ; qs1
|
|
||||||
|
veor q5, q5, q10 ; ps1
|
||||||
|
vqadd.u8 q9, q9, q2 ; a = b + a
|
||||||
|
|
||||||
|
veor q8, q8, q10 ; qs1
|
||||||
|
|
||||||
vmov.u8 q10, #3 ; #3
|
vmov.u8 q10, #3 ; #3
|
||||||
|
|
||||||
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
|
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
|
||||||
vsubl.s8 q11, d15, d13
|
vsubl.s8 q11, d15, d13
|
||||||
|
|
||||||
|
vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
|
||||||
|
|
||||||
vmovl.u8 q4, d20
|
vmovl.u8 q4, d20
|
||||||
|
|
||||||
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
|
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
|
||||||
@ -378,19 +373,20 @@
|
|||||||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||||
vshr.s8 q1, q1, #3 ; Filter1 >>= 3
|
vshr.s8 q1, q1, #3 ; Filter1 >>= 3
|
||||||
|
|
||||||
|
|
||||||
vqadd.s8 q11, q6, q2 ; u = clamp(ps0 + Filter2)
|
vqadd.s8 q11, q6, q2 ; u = clamp(ps0 + Filter2)
|
||||||
vqsub.s8 q10, q7, q1 ; u = clamp(qs0 - Filter1)
|
vqsub.s8 q10, q7, q1 ; u = clamp(qs0 - Filter1)
|
||||||
|
|
||||||
; outer tap adjustments: ++vp8_filter >> 1
|
; outer tap adjustments: ++vp8_filter >> 1
|
||||||
vrshr.s8 q1, q1, #1
|
vrshr.s8 q1, q1, #1
|
||||||
vbic q1, q1, q14 ; vp8_filter &= ~hev
|
vbic q1, q1, q14 ; vp8_filter &= ~hev
|
||||||
|
vmov.u8 q0, #0x80 ; 0x80
|
||||||
vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + vp8_filter)
|
vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + vp8_filter)
|
||||||
vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - vp8_filter)
|
vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - vp8_filter)
|
||||||
|
|
||||||
veor q5, q13, q0 ; *op1 = u^0x80
|
|
||||||
veor q6, q11, q0 ; *op0 = u^0x80
|
veor q6, q11, q0 ; *op0 = u^0x80
|
||||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||||
|
veor q5, q13, q0 ; *op1 = u^0x80
|
||||||
veor q8, q12, q0 ; *oq1 = u^0x80
|
veor q8, q12, q0 ; *oq1 = u^0x80
|
||||||
|
|
||||||
bx lr
|
bx lr
|
||||||
|
@ -9,99 +9,109 @@
|
|||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
|
;EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||||
|
EXPORT |vp8_loop_filter_bhs_neon|
|
||||||
|
EXPORT |vp8_loop_filter_mbhs_neon|
|
||||||
ARM
|
ARM
|
||||||
REQUIRE8
|
|
||||||
PRESERVE8
|
PRESERVE8
|
||||||
|
|
||||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||||
;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
|
|
||||||
;are equal. So, in the code, only one load is needed
|
; r0 unsigned char *s, PRESERVE
|
||||||
;for flimit. Same way applies to limit and thresh.
|
; r1 int p, PRESERVE
|
||||||
; r0 unsigned char *s,
|
; q1 limit, PRESERVE
|
||||||
; r1 int p, //pitch
|
|
||||||
; r2 const signed char *flimit,
|
|
||||||
; r3 const signed char *limit,
|
|
||||||
; stack(r4) const signed char *thresh (unused)
|
|
||||||
; //stack(r5) int count --unused
|
|
||||||
|
|
||||||
|vp8_loop_filter_simple_horizontal_edge_neon| PROC
|
|vp8_loop_filter_simple_horizontal_edge_neon| PROC
|
||||||
sub r0, r0, r1, lsl #1 ; move src pointer down by 2 lines
|
|
||||||
|
|
||||||
vld1.u8 {q5}, [r0], r1 ; p1
|
sub r3, r0, r1, lsl #1 ; move src pointer down by 2 lines
|
||||||
vld1.s8 {d2[], d3[]}, [r2] ; flimit
|
|
||||||
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
|
vld1.u8 {q7}, [r0@128], r1 ; q0
|
||||||
vld1.u8 {q6}, [r0], r1 ; p0
|
vld1.u8 {q5}, [r3@128], r1 ; p0
|
||||||
vmov.u8 q0, #0x80 ; 0x80
|
vld1.u8 {q8}, [r0@128] ; q1
|
||||||
vld1.u8 {q7}, [r0], r1 ; q0
|
vld1.u8 {q6}, [r3@128] ; p1
|
||||||
vmov.u8 q10, #0x03 ; 0x03
|
|
||||||
vld1.u8 {q8}, [r0] ; q1
|
|
||||||
|
|
||||||
;vp8_filter_mask() function
|
|
||||||
vabd.u8 q15, q6, q7 ; abs(p0 - q0)
|
vabd.u8 q15, q6, q7 ; abs(p0 - q0)
|
||||||
vabd.u8 q14, q5, q8 ; abs(p1 - q1)
|
vabd.u8 q14, q5, q8 ; abs(p1 - q1)
|
||||||
|
|
||||||
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
|
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
|
||||||
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
|
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
|
||||||
|
vmov.u8 q0, #0x80 ; 0x80
|
||||||
|
vmov.s16 q13, #3
|
||||||
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
|
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
|
||||||
|
|
||||||
;vp8_filter() function
|
|
||||||
veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
|
veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
|
||||||
veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
|
veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
|
||||||
veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
|
veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
|
||||||
veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
|
veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
|
||||||
|
|
||||||
vadd.u8 q1, q1, q1 ; flimit * 2
|
vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > limit)*-1
|
||||||
vadd.u8 q1, q1, q13 ; flimit * 2 + limit
|
|
||||||
vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
|
|
||||||
|
|
||||||
;;;;;;;;;;
|
|
||||||
;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
|
|
||||||
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
|
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
|
||||||
vsubl.s8 q3, d15, d13
|
vsubl.s8 q3, d15, d13
|
||||||
|
|
||||||
vqsub.s8 q4, q5, q8 ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
vqsub.s8 q4, q5, q8 ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
||||||
|
|
||||||
;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0)
|
vmul.s16 q2, q2, q13 ; 3 * ( qs0 - ps0)
|
||||||
vadd.s16 q11, q2, q2 ; 3 * ( qs0 - ps0)
|
vmul.s16 q3, q3, q13
|
||||||
vadd.s16 q12, q3, q3
|
|
||||||
|
|
||||||
|
vmov.u8 q10, #0x03 ; 0x03
|
||||||
vmov.u8 q9, #0x04 ; 0x04
|
vmov.u8 q9, #0x04 ; 0x04
|
||||||
|
|
||||||
vadd.s16 q2, q2, q11
|
|
||||||
vadd.s16 q3, q3, q12
|
|
||||||
|
|
||||||
vaddw.s8 q2, q2, d8 ; vp8_filter + 3 * ( qs0 - ps0)
|
vaddw.s8 q2, q2, d8 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||||
vaddw.s8 q3, q3, d9
|
vaddw.s8 q3, q3, d9
|
||||||
|
|
||||||
;vqadd.s8 q4, q4, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
|
||||||
vqmovn.s16 d8, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
vqmovn.s16 d8, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||||
vqmovn.s16 d9, q3
|
vqmovn.s16 d9, q3
|
||||||
;;;;;;;;;;;;;
|
|
||||||
|
|
||||||
vand q4, q4, q15 ; vp8_filter &= mask
|
vand q14, q4, q15 ; vp8_filter &= mask
|
||||||
|
|
||||||
vqadd.s8 q2, q4, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
vqadd.s8 q2, q14, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
||||||
vqadd.s8 q4, q4, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
vqadd.s8 q3, q14, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
||||||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||||
vshr.s8 q4, q4, #3 ; Filter1 >>= 3
|
vshr.s8 q4, q3, #3 ; Filter1 >>= 3
|
||||||
|
|
||||||
sub r0, r0, r1, lsl #1
|
sub r0, r0, r1
|
||||||
|
|
||||||
;calculate output
|
;calculate output
|
||||||
vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
|
vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
|
||||||
vqsub.s8 q10, q7, q4 ; u = vp8_signed_char_clamp(qs0 - Filter1)
|
vqsub.s8 q10, q7, q4 ; u = vp8_signed_char_clamp(qs0 - Filter1)
|
||||||
|
|
||||||
add r3, r0, r1
|
|
||||||
|
|
||||||
veor q6, q11, q0 ; *op0 = u^0x80
|
veor q6, q11, q0 ; *op0 = u^0x80
|
||||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||||
|
|
||||||
vst1.u8 {q6}, [r0] ; store op0
|
vst1.u8 {q6}, [r3@128] ; store op0
|
||||||
vst1.u8 {q7}, [r3] ; store oq0
|
vst1.u8 {q7}, [r0@128] ; store oq0
|
||||||
|
|
||||||
bx lr
|
bx lr
|
||||||
ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon|
|
ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||||
|
|
||||||
;-----------------
|
; r0 unsigned char *y
|
||||||
|
; r1 int ystride
|
||||||
|
; r2 const unsigned char *blimit
|
||||||
|
|
||||||
|
|vp8_loop_filter_bhs_neon| PROC
|
||||||
|
push {r4, lr}
|
||||||
|
ldrb r3, [r2] ; load blim from mem
|
||||||
|
vdup.s8 q1, r3 ; duplicate blim
|
||||||
|
|
||||||
|
add r0, r0, r1, lsl #2 ; src = y_ptr + 4 * y_stride
|
||||||
|
bl vp8_loop_filter_simple_horizontal_edge_neon
|
||||||
|
; vp8_loop_filter_simple_horizontal_edge_neon preserves r0, r1 and q1
|
||||||
|
add r0, r0, r1, lsl #2 ; src = y_ptr + 8* y_stride
|
||||||
|
bl vp8_loop_filter_simple_horizontal_edge_neon
|
||||||
|
add r0, r0, r1, lsl #2 ; src = y_ptr + 12 * y_stride
|
||||||
|
pop {r4, lr}
|
||||||
|
b vp8_loop_filter_simple_horizontal_edge_neon
|
||||||
|
ENDP ;|vp8_loop_filter_bhs_neon|
|
||||||
|
|
||||||
|
; r0 unsigned char *y
|
||||||
|
; r1 int ystride
|
||||||
|
; r2 const unsigned char *blimit
|
||||||
|
|
||||||
|
|vp8_loop_filter_mbhs_neon| PROC
|
||||||
|
ldrb r3, [r2] ; load blim from mem
|
||||||
|
vdup.s8 q1, r3 ; duplicate mblim
|
||||||
|
b vp8_loop_filter_simple_horizontal_edge_neon
|
||||||
|
ENDP ;|vp8_loop_filter_bhs_neon|
|
||||||
|
|
||||||
END
|
END
|
||||||
|
@ -9,59 +9,54 @@
|
|||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
EXPORT |vp8_loop_filter_simple_vertical_edge_neon|
|
;EXPORT |vp8_loop_filter_simple_vertical_edge_neon|
|
||||||
|
EXPORT |vp8_loop_filter_bvs_neon|
|
||||||
|
EXPORT |vp8_loop_filter_mbvs_neon|
|
||||||
ARM
|
ARM
|
||||||
REQUIRE8
|
|
||||||
PRESERVE8
|
PRESERVE8
|
||||||
|
|
||||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||||
;Note: flimit, limit, and thresh should be positive numbers. All 16 elements in flimit
|
|
||||||
;are equal. So, in the code, only one load is needed
|
; r0 unsigned char *s, PRESERVE
|
||||||
;for flimit. Same way applies to limit and thresh.
|
; r1 int p, PRESERVE
|
||||||
; r0 unsigned char *s,
|
; q1 limit, PRESERVE
|
||||||
; r1 int p, //pitch
|
|
||||||
; r2 const signed char *flimit,
|
|
||||||
; r3 const signed char *limit,
|
|
||||||
; stack(r4) const signed char *thresh (unused)
|
|
||||||
; //stack(r5) int count --unused
|
|
||||||
|
|
||||||
|vp8_loop_filter_simple_vertical_edge_neon| PROC
|
|vp8_loop_filter_simple_vertical_edge_neon| PROC
|
||||||
sub r0, r0, #2 ; move src pointer down by 2 columns
|
sub r0, r0, #2 ; move src pointer down by 2 columns
|
||||||
|
add r12, r1, r1
|
||||||
|
add r3, r0, r1
|
||||||
|
|
||||||
vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r1
|
vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r12
|
||||||
vld1.s8 {d2[], d3[]}, [r2] ; flimit
|
vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r3], r12
|
||||||
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
|
vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r12
|
||||||
vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r0], r1
|
vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r3], r12
|
||||||
vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r1
|
vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r12
|
||||||
vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r0], r1
|
vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r3], r12
|
||||||
vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r1
|
vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r12
|
||||||
vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r0], r1
|
vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r3], r12
|
||||||
vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r1
|
|
||||||
vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r0], r1
|
|
||||||
|
|
||||||
vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r12
|
||||||
vmov.u8 q0, #0x80 ; 0x80
|
vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r3], r12
|
||||||
vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
|
vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r12
|
||||||
vmov.u8 q11, #0x03 ; 0x03
|
vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r3], r12
|
||||||
vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
|
vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r12
|
||||||
vmov.u8 q12, #0x04 ; 0x04
|
vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r3], r12
|
||||||
vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
|
vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r12
|
||||||
vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
|
vld4.8 {d10[7], d11[7], d12[7], d13[7]}, [r3]
|
||||||
vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
|
|
||||||
vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
|
|
||||||
vld4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
|
|
||||||
|
|
||||||
vswp d7, d10
|
vswp d7, d10
|
||||||
vswp d12, d9
|
vswp d12, d9
|
||||||
;vswp q4, q5 ; p1:q3, p0:q5, q0:q4, q1:q6
|
|
||||||
|
|
||||||
;vp8_filter_mask() function
|
;vp8_filter_mask() function
|
||||||
;vp8_hevmask() function
|
;vp8_hevmask() function
|
||||||
sub r0, r0, r1, lsl #4
|
sub r0, r0, r1, lsl #4
|
||||||
vabd.u8 q15, q5, q4 ; abs(p0 - q0)
|
vabd.u8 q15, q5, q4 ; abs(p0 - q0)
|
||||||
vabd.u8 q14, q3, q6 ; abs(p1 - q1)
|
vabd.u8 q14, q3, q6 ; abs(p1 - q1)
|
||||||
|
|
||||||
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
|
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
|
||||||
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
|
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
|
||||||
|
vmov.u8 q0, #0x80 ; 0x80
|
||||||
|
vmov.s16 q11, #3
|
||||||
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
|
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
|
||||||
|
|
||||||
veor q4, q4, q0 ; qs0: q0 offset to convert to a signed value
|
veor q4, q4, q0 ; qs0: q0 offset to convert to a signed value
|
||||||
@ -69,80 +64,91 @@
|
|||||||
veor q3, q3, q0 ; ps1: p1 offset to convert to a signed value
|
veor q3, q3, q0 ; ps1: p1 offset to convert to a signed value
|
||||||
veor q6, q6, q0 ; qs1: q1 offset to convert to a signed value
|
veor q6, q6, q0 ; qs1: q1 offset to convert to a signed value
|
||||||
|
|
||||||
vadd.u8 q1, q1, q1 ; flimit * 2
|
|
||||||
vadd.u8 q1, q1, q13 ; flimit * 2 + limit
|
|
||||||
vcge.u8 q15, q1, q15 ; abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
|
vcge.u8 q15, q1, q15 ; abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
|
||||||
|
|
||||||
;vp8_filter() function
|
|
||||||
;;;;;;;;;;
|
|
||||||
;vqsub.s8 q2, q5, q4 ; ( qs0 - ps0)
|
|
||||||
vsubl.s8 q2, d8, d10 ; ( qs0 - ps0)
|
vsubl.s8 q2, d8, d10 ; ( qs0 - ps0)
|
||||||
vsubl.s8 q13, d9, d11
|
vsubl.s8 q13, d9, d11
|
||||||
|
|
||||||
vqsub.s8 q1, q3, q6 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
vqsub.s8 q14, q3, q6 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
||||||
|
|
||||||
;vmul.i8 q2, q2, q11 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
vmul.s16 q2, q2, q11 ; 3 * ( qs0 - ps0)
|
||||||
vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0)
|
vmul.s16 q13, q13, q11
|
||||||
vadd.s16 q14, q13, q13
|
|
||||||
vadd.s16 q2, q2, q10
|
|
||||||
vadd.s16 q13, q13, q14
|
|
||||||
|
|
||||||
;vqadd.s8 q1, q1, q2
|
vmov.u8 q11, #0x03 ; 0x03
|
||||||
vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
|
vmov.u8 q12, #0x04 ; 0x04
|
||||||
vaddw.s8 q13, q13, d3
|
|
||||||
|
|
||||||
vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
vaddw.s8 q2, q2, d28 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||||
vqmovn.s16 d3, q13
|
vaddw.s8 q13, q13, d29
|
||||||
|
|
||||||
|
vqmovn.s16 d28, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||||
|
vqmovn.s16 d29, q13
|
||||||
|
|
||||||
add r0, r0, #1
|
add r0, r0, #1
|
||||||
add r2, r0, r1
|
add r3, r0, r1
|
||||||
;;;;;;;;;;;
|
|
||||||
|
|
||||||
vand q1, q1, q15 ; vp8_filter &= mask
|
vand q14, q14, q15 ; vp8_filter &= mask
|
||||||
|
|
||||||
vqadd.s8 q2, q1, q11 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
vqadd.s8 q2, q14, q11 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
||||||
vqadd.s8 q1, q1, q12 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
vqadd.s8 q3, q14, q12 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
||||||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||||
vshr.s8 q1, q1, #3 ; Filter1 >>= 3
|
vshr.s8 q14, q3, #3 ; Filter1 >>= 3
|
||||||
|
|
||||||
;calculate output
|
;calculate output
|
||||||
vqsub.s8 q10, q4, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1)
|
|
||||||
vqadd.s8 q11, q5, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
|
vqadd.s8 q11, q5, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
|
||||||
|
vqsub.s8 q10, q4, q14 ; u = vp8_signed_char_clamp(qs0 - Filter1)
|
||||||
|
|
||||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
|
||||||
veor q6, q11, q0 ; *op0 = u^0x80
|
veor q6, q11, q0 ; *op0 = u^0x80
|
||||||
|
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||||
add r3, r2, r1
|
add r12, r1, r1
|
||||||
vswp d13, d14
|
vswp d13, d14
|
||||||
add r12, r3, r1
|
|
||||||
|
|
||||||
;store op1, op0, oq0, oq1
|
;store op1, op0, oq0, oq1
|
||||||
vst2.8 {d12[0], d13[0]}, [r0]
|
vst2.8 {d12[0], d13[0]}, [r0], r12
|
||||||
vst2.8 {d12[1], d13[1]}, [r2]
|
vst2.8 {d12[1], d13[1]}, [r3], r12
|
||||||
vst2.8 {d12[2], d13[2]}, [r3]
|
vst2.8 {d12[2], d13[2]}, [r0], r12
|
||||||
vst2.8 {d12[3], d13[3]}, [r12], r1
|
vst2.8 {d12[3], d13[3]}, [r3], r12
|
||||||
add r0, r12, r1
|
vst2.8 {d12[4], d13[4]}, [r0], r12
|
||||||
vst2.8 {d12[4], d13[4]}, [r12]
|
vst2.8 {d12[5], d13[5]}, [r3], r12
|
||||||
vst2.8 {d12[5], d13[5]}, [r0], r1
|
vst2.8 {d12[6], d13[6]}, [r0], r12
|
||||||
add r2, r0, r1
|
vst2.8 {d12[7], d13[7]}, [r3], r12
|
||||||
vst2.8 {d12[6], d13[6]}, [r0]
|
vst2.8 {d14[0], d15[0]}, [r0], r12
|
||||||
vst2.8 {d12[7], d13[7]}, [r2], r1
|
vst2.8 {d14[1], d15[1]}, [r3], r12
|
||||||
add r3, r2, r1
|
vst2.8 {d14[2], d15[2]}, [r0], r12
|
||||||
vst2.8 {d14[0], d15[0]}, [r2]
|
vst2.8 {d14[3], d15[3]}, [r3], r12
|
||||||
vst2.8 {d14[1], d15[1]}, [r3], r1
|
vst2.8 {d14[4], d15[4]}, [r0], r12
|
||||||
add r12, r3, r1
|
vst2.8 {d14[5], d15[5]}, [r3], r12
|
||||||
vst2.8 {d14[2], d15[2]}, [r3]
|
vst2.8 {d14[6], d15[6]}, [r0], r12
|
||||||
vst2.8 {d14[3], d15[3]}, [r12], r1
|
vst2.8 {d14[7], d15[7]}, [r3]
|
||||||
add r0, r12, r1
|
|
||||||
vst2.8 {d14[4], d15[4]}, [r12]
|
|
||||||
vst2.8 {d14[5], d15[5]}, [r0], r1
|
|
||||||
add r2, r0, r1
|
|
||||||
vst2.8 {d14[6], d15[6]}, [r0]
|
|
||||||
vst2.8 {d14[7], d15[7]}, [r2]
|
|
||||||
|
|
||||||
bx lr
|
bx lr
|
||||||
ENDP ; |vp8_loop_filter_simple_vertical_edge_neon|
|
ENDP ; |vp8_loop_filter_simple_vertical_edge_neon|
|
||||||
|
|
||||||
;-----------------
|
; r0 unsigned char *y
|
||||||
|
; r1 int ystride
|
||||||
|
; r2 const unsigned char *blimit
|
||||||
|
|
||||||
|
|vp8_loop_filter_bvs_neon| PROC
|
||||||
|
push {r4, lr}
|
||||||
|
ldrb r3, [r2] ; load blim from mem
|
||||||
|
mov r4, r0
|
||||||
|
add r0, r0, #4
|
||||||
|
vdup.s8 q1, r3 ; duplicate blim
|
||||||
|
bl vp8_loop_filter_simple_vertical_edge_neon
|
||||||
|
; vp8_loop_filter_simple_vertical_edge_neon preserves r1 and q1
|
||||||
|
add r0, r4, #8
|
||||||
|
bl vp8_loop_filter_simple_vertical_edge_neon
|
||||||
|
add r0, r4, #12
|
||||||
|
pop {r4, lr}
|
||||||
|
b vp8_loop_filter_simple_vertical_edge_neon
|
||||||
|
ENDP ;|vp8_loop_filter_bvs_neon|
|
||||||
|
|
||||||
|
; r0 unsigned char *y
|
||||||
|
; r1 int ystride
|
||||||
|
; r2 const unsigned char *blimit
|
||||||
|
|
||||||
|
|vp8_loop_filter_mbvs_neon| PROC
|
||||||
|
ldrb r3, [r2] ; load mblim from mem
|
||||||
|
vdup.s8 q1, r3 ; duplicate mblim
|
||||||
|
b vp8_loop_filter_simple_vertical_edge_neon
|
||||||
|
ENDP ;|vp8_loop_filter_bvs_neon|
|
||||||
END
|
END
|
||||||
|
@ -14,155 +14,143 @@
|
|||||||
EXPORT |vp8_mbloop_filter_vertical_edge_y_neon|
|
EXPORT |vp8_mbloop_filter_vertical_edge_y_neon|
|
||||||
EXPORT |vp8_mbloop_filter_vertical_edge_uv_neon|
|
EXPORT |vp8_mbloop_filter_vertical_edge_uv_neon|
|
||||||
ARM
|
ARM
|
||||||
REQUIRE8
|
|
||||||
PRESERVE8
|
|
||||||
|
|
||||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||||
|
|
||||||
; flimit, limit, and thresh should be positive numbers.
|
|
||||||
; All 16 elements in these variables are equal.
|
|
||||||
|
|
||||||
; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
|
; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
|
||||||
; const signed char *flimit,
|
; const unsigned char *blimit,
|
||||||
; const signed char *limit,
|
; const unsigned char *limit,
|
||||||
; const signed char *thresh,
|
; const unsigned char *thresh)
|
||||||
; int count)
|
|
||||||
; r0 unsigned char *src,
|
; r0 unsigned char *src,
|
||||||
; r1 int pitch,
|
; r1 int pitch,
|
||||||
; r2 const signed char *flimit,
|
; r2 unsigned char blimit
|
||||||
; r3 const signed char *limit,
|
; r3 unsigned char limit
|
||||||
; sp const signed char *thresh,
|
; sp unsigned char thresh,
|
||||||
; sp+4 int count (unused)
|
|
||||||
|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
|
|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
|
||||||
stmdb sp!, {lr}
|
push {lr}
|
||||||
sub r0, r0, r1, lsl #2 ; move src pointer down by 4 lines
|
add r1, r1, r1 ; double stride
|
||||||
ldr r12, [sp, #4] ; load thresh pointer
|
ldr r12, [sp, #4] ; load thresh
|
||||||
|
sub r0, r0, r1, lsl #1 ; move src pointer down by 4 lines
|
||||||
|
vdup.u8 q2, r12 ; thresh
|
||||||
|
add r12, r0, r1, lsr #1 ; move src pointer up by 1 line
|
||||||
|
|
||||||
vld1.u8 {q3}, [r0], r1 ; p3
|
vld1.u8 {q3}, [r0@128], r1 ; p3
|
||||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
vld1.u8 {q4}, [r12@128], r1 ; p2
|
||||||
vld1.u8 {q4}, [r0], r1 ; p2
|
vld1.u8 {q5}, [r0@128], r1 ; p1
|
||||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
vld1.u8 {q6}, [r12@128], r1 ; p0
|
||||||
vld1.u8 {q5}, [r0], r1 ; p1
|
vld1.u8 {q7}, [r0@128], r1 ; q0
|
||||||
vld1.u8 {q6}, [r0], r1 ; p0
|
vld1.u8 {q8}, [r12@128], r1 ; q1
|
||||||
vld1.u8 {q7}, [r0], r1 ; q0
|
vld1.u8 {q9}, [r0@128], r1 ; q2
|
||||||
vld1.u8 {q8}, [r0], r1 ; q1
|
vld1.u8 {q10}, [r12@128], r1 ; q3
|
||||||
vld1.u8 {q9}, [r0], r1 ; q2
|
|
||||||
vld1.u8 {q10}, [r0], r1 ; q3
|
|
||||||
|
|
||||||
bl vp8_mbloop_filter_neon
|
bl vp8_mbloop_filter_neon
|
||||||
|
|
||||||
sub r0, r0, r1, lsl #3
|
sub r12, r12, r1, lsl #2
|
||||||
add r0, r0, r1
|
add r0, r12, r1, lsr #1
|
||||||
add r2, r0, r1
|
|
||||||
add r3, r2, r1
|
|
||||||
|
|
||||||
vst1.u8 {q4}, [r0] ; store op2
|
vst1.u8 {q4}, [r12@128],r1 ; store op2
|
||||||
vst1.u8 {q5}, [r2] ; store op1
|
vst1.u8 {q5}, [r0@128],r1 ; store op1
|
||||||
vst1.u8 {q6}, [r3], r1 ; store op0
|
vst1.u8 {q6}, [r12@128], r1 ; store op0
|
||||||
add r12, r3, r1
|
vst1.u8 {q7}, [r0@128],r1 ; store oq0
|
||||||
vst1.u8 {q7}, [r3] ; store oq0
|
vst1.u8 {q8}, [r12@128] ; store oq1
|
||||||
vst1.u8 {q8}, [r12], r1 ; store oq1
|
vst1.u8 {q9}, [r0@128] ; store oq2
|
||||||
vst1.u8 {q9}, [r12] ; store oq2
|
|
||||||
|
|
||||||
ldmia sp!, {pc}
|
pop {pc}
|
||||||
ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon|
|
ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon|
|
||||||
|
|
||||||
; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
|
; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
|
||||||
; const signed char *flimit,
|
; const unsigned char *blimit,
|
||||||
; const signed char *limit,
|
; const unsigned char *limit,
|
||||||
; const signed char *thresh,
|
; const unsigned char *thresh,
|
||||||
; unsigned char *v)
|
; unsigned char *v)
|
||||||
; r0 unsigned char *u,
|
; r0 unsigned char *u,
|
||||||
; r1 int pitch,
|
; r1 int pitch,
|
||||||
; r2 const signed char *flimit,
|
; r2 unsigned char blimit
|
||||||
; r3 const signed char *limit,
|
; r3 unsigned char limit
|
||||||
; sp const signed char *thresh,
|
; sp unsigned char thresh,
|
||||||
; sp+4 unsigned char *v
|
; sp+4 unsigned char *v
|
||||||
|
|
||||||
|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
|
|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
|
||||||
stmdb sp!, {lr}
|
push {lr}
|
||||||
sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
|
ldr r12, [sp, #4] ; load thresh
|
||||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
|
||||||
ldr r3, [sp, #8] ; load v ptr
|
vdup.u8 q2, r12 ; thresh
|
||||||
ldr r12, [sp, #4] ; load thresh pointer
|
ldr r12, [sp, #8] ; load v ptr
|
||||||
sub r3, r3, r1, lsl #2 ; move v pointer down by 4 lines
|
sub r12, r12, r1, lsl #2 ; move v pointer down by 4 lines
|
||||||
|
|
||||||
vld1.u8 {d6}, [r0], r1 ; p3
|
vld1.u8 {d6}, [r0@64], r1 ; p3
|
||||||
vld1.u8 {d7}, [r3], r1 ; p3
|
vld1.u8 {d7}, [r12@64], r1 ; p3
|
||||||
vld1.u8 {d8}, [r0], r1 ; p2
|
vld1.u8 {d8}, [r0@64], r1 ; p2
|
||||||
vld1.u8 {d9}, [r3], r1 ; p2
|
vld1.u8 {d9}, [r12@64], r1 ; p2
|
||||||
vld1.u8 {d10}, [r0], r1 ; p1
|
vld1.u8 {d10}, [r0@64], r1 ; p1
|
||||||
vld1.u8 {d11}, [r3], r1 ; p1
|
vld1.u8 {d11}, [r12@64], r1 ; p1
|
||||||
vld1.u8 {d12}, [r0], r1 ; p0
|
vld1.u8 {d12}, [r0@64], r1 ; p0
|
||||||
vld1.u8 {d13}, [r3], r1 ; p0
|
vld1.u8 {d13}, [r12@64], r1 ; p0
|
||||||
vld1.u8 {d14}, [r0], r1 ; q0
|
vld1.u8 {d14}, [r0@64], r1 ; q0
|
||||||
vld1.u8 {d15}, [r3], r1 ; q0
|
vld1.u8 {d15}, [r12@64], r1 ; q0
|
||||||
vld1.u8 {d16}, [r0], r1 ; q1
|
vld1.u8 {d16}, [r0@64], r1 ; q1
|
||||||
vld1.u8 {d17}, [r3], r1 ; q1
|
vld1.u8 {d17}, [r12@64], r1 ; q1
|
||||||
vld1.u8 {d18}, [r0], r1 ; q2
|
vld1.u8 {d18}, [r0@64], r1 ; q2
|
||||||
vld1.u8 {d19}, [r3], r1 ; q2
|
vld1.u8 {d19}, [r12@64], r1 ; q2
|
||||||
vld1.u8 {d20}, [r0], r1 ; q3
|
vld1.u8 {d20}, [r0@64], r1 ; q3
|
||||||
vld1.u8 {d21}, [r3], r1 ; q3
|
vld1.u8 {d21}, [r12@64], r1 ; q3
|
||||||
|
|
||||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
|
||||||
|
|
||||||
bl vp8_mbloop_filter_neon
|
bl vp8_mbloop_filter_neon
|
||||||
|
|
||||||
sub r0, r0, r1, lsl #3
|
sub r0, r0, r1, lsl #3
|
||||||
sub r3, r3, r1, lsl #3
|
sub r12, r12, r1, lsl #3
|
||||||
|
|
||||||
add r0, r0, r1
|
add r0, r0, r1
|
||||||
add r3, r3, r1
|
add r12, r12, r1
|
||||||
|
|
||||||
vst1.u8 {d8}, [r0], r1 ; store u op2
|
vst1.u8 {d8}, [r0@64], r1 ; store u op2
|
||||||
vst1.u8 {d9}, [r3], r1 ; store v op2
|
vst1.u8 {d9}, [r12@64], r1 ; store v op2
|
||||||
vst1.u8 {d10}, [r0], r1 ; store u op1
|
vst1.u8 {d10}, [r0@64], r1 ; store u op1
|
||||||
vst1.u8 {d11}, [r3], r1 ; store v op1
|
vst1.u8 {d11}, [r12@64], r1 ; store v op1
|
||||||
vst1.u8 {d12}, [r0], r1 ; store u op0
|
vst1.u8 {d12}, [r0@64], r1 ; store u op0
|
||||||
vst1.u8 {d13}, [r3], r1 ; store v op0
|
vst1.u8 {d13}, [r12@64], r1 ; store v op0
|
||||||
vst1.u8 {d14}, [r0], r1 ; store u oq0
|
vst1.u8 {d14}, [r0@64], r1 ; store u oq0
|
||||||
vst1.u8 {d15}, [r3], r1 ; store v oq0
|
vst1.u8 {d15}, [r12@64], r1 ; store v oq0
|
||||||
vst1.u8 {d16}, [r0], r1 ; store u oq1
|
vst1.u8 {d16}, [r0@64], r1 ; store u oq1
|
||||||
vst1.u8 {d17}, [r3], r1 ; store v oq1
|
vst1.u8 {d17}, [r12@64], r1 ; store v oq1
|
||||||
vst1.u8 {d18}, [r0], r1 ; store u oq2
|
vst1.u8 {d18}, [r0@64], r1 ; store u oq2
|
||||||
vst1.u8 {d19}, [r3], r1 ; store v oq2
|
vst1.u8 {d19}, [r12@64], r1 ; store v oq2
|
||||||
|
|
||||||
ldmia sp!, {pc}
|
pop {pc}
|
||||||
ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
|
ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
|
||||||
|
|
||||||
; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
|
; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
|
||||||
; const signed char *flimit,
|
; const unsigned char *blimit,
|
||||||
; const signed char *limit,
|
; const unsigned char *limit,
|
||||||
; const signed char *thresh,
|
; const unsigned char *thresh)
|
||||||
; int count)
|
|
||||||
; r0 unsigned char *src,
|
; r0 unsigned char *src,
|
||||||
; r1 int pitch,
|
; r1 int pitch,
|
||||||
; r2 const signed char *flimit,
|
; r2 unsigned char blimit
|
||||||
; r3 const signed char *limit,
|
; r3 unsigned char limit
|
||||||
; sp const signed char *thresh,
|
; sp unsigned char thresh,
|
||||||
; sp+4 int count (unused)
|
|
||||||
|vp8_mbloop_filter_vertical_edge_y_neon| PROC
|
|vp8_mbloop_filter_vertical_edge_y_neon| PROC
|
||||||
stmdb sp!, {lr}
|
push {lr}
|
||||||
|
ldr r12, [sp, #4] ; load thresh
|
||||||
sub r0, r0, #4 ; move src pointer down by 4 columns
|
sub r0, r0, #4 ; move src pointer down by 4 columns
|
||||||
|
vdup.s8 q2, r12 ; thresh
|
||||||
|
add r12, r0, r1, lsl #3 ; move src pointer down by 8 lines
|
||||||
|
|
||||||
vld1.u8 {d6}, [r0], r1 ; load first 8-line src data
|
vld1.u8 {d6}, [r0], r1 ; load first 8-line src data
|
||||||
ldr r12, [sp, #4] ; load thresh pointer
|
vld1.u8 {d7}, [r12], r1 ; load second 8-line src data
|
||||||
vld1.u8 {d8}, [r0], r1
|
vld1.u8 {d8}, [r0], r1
|
||||||
sub sp, sp, #32
|
vld1.u8 {d9}, [r12], r1
|
||||||
vld1.u8 {d10}, [r0], r1
|
vld1.u8 {d10}, [r0], r1
|
||||||
|
vld1.u8 {d11}, [r12], r1
|
||||||
vld1.u8 {d12}, [r0], r1
|
vld1.u8 {d12}, [r0], r1
|
||||||
|
vld1.u8 {d13}, [r12], r1
|
||||||
vld1.u8 {d14}, [r0], r1
|
vld1.u8 {d14}, [r0], r1
|
||||||
|
vld1.u8 {d15}, [r12], r1
|
||||||
vld1.u8 {d16}, [r0], r1
|
vld1.u8 {d16}, [r0], r1
|
||||||
|
vld1.u8 {d17}, [r12], r1
|
||||||
vld1.u8 {d18}, [r0], r1
|
vld1.u8 {d18}, [r0], r1
|
||||||
|
vld1.u8 {d19}, [r12], r1
|
||||||
vld1.u8 {d20}, [r0], r1
|
vld1.u8 {d20}, [r0], r1
|
||||||
|
vld1.u8 {d21}, [r12], r1
|
||||||
vld1.u8 {d7}, [r0], r1 ; load second 8-line src data
|
|
||||||
vld1.u8 {d9}, [r0], r1
|
|
||||||
vld1.u8 {d11}, [r0], r1
|
|
||||||
vld1.u8 {d13}, [r0], r1
|
|
||||||
vld1.u8 {d15}, [r0], r1
|
|
||||||
vld1.u8 {d17}, [r0], r1
|
|
||||||
vld1.u8 {d19}, [r0], r1
|
|
||||||
vld1.u8 {d21}, [r0], r1
|
|
||||||
|
|
||||||
;transpose to 8x16 matrix
|
;transpose to 8x16 matrix
|
||||||
vtrn.32 q3, q7
|
vtrn.32 q3, q7
|
||||||
@ -180,133 +168,11 @@
|
|||||||
vtrn.8 q7, q8
|
vtrn.8 q7, q8
|
||||||
vtrn.8 q9, q10
|
vtrn.8 q9, q10
|
||||||
|
|
||||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
|
||||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
|
||||||
mov r12, sp
|
|
||||||
vst1.u8 {q3}, [r12]!
|
|
||||||
vst1.u8 {q10}, [r12]!
|
|
||||||
|
|
||||||
bl vp8_mbloop_filter_neon
|
|
||||||
|
|
||||||
sub r0, r0, r1, lsl #4
|
|
||||||
|
|
||||||
add r2, r0, r1
|
|
||||||
|
|
||||||
add r3, r2, r1
|
|
||||||
|
|
||||||
vld1.u8 {q3}, [sp]!
|
|
||||||
vld1.u8 {q10}, [sp]!
|
|
||||||
|
|
||||||
;transpose to 16x8 matrix
|
|
||||||
vtrn.32 q3, q7
|
|
||||||
vtrn.32 q4, q8
|
|
||||||
vtrn.32 q5, q9
|
|
||||||
vtrn.32 q6, q10
|
|
||||||
add r12, r3, r1
|
|
||||||
|
|
||||||
vtrn.16 q3, q5
|
|
||||||
vtrn.16 q4, q6
|
|
||||||
vtrn.16 q7, q9
|
|
||||||
vtrn.16 q8, q10
|
|
||||||
|
|
||||||
vtrn.8 q3, q4
|
|
||||||
vtrn.8 q5, q6
|
|
||||||
vtrn.8 q7, q8
|
|
||||||
vtrn.8 q9, q10
|
|
||||||
|
|
||||||
;store op2, op1, op0, oq0, oq1, oq2
|
|
||||||
vst1.8 {d6}, [r0]
|
|
||||||
vst1.8 {d8}, [r2]
|
|
||||||
vst1.8 {d10}, [r3]
|
|
||||||
vst1.8 {d12}, [r12], r1
|
|
||||||
add r0, r12, r1
|
|
||||||
vst1.8 {d14}, [r12]
|
|
||||||
vst1.8 {d16}, [r0], r1
|
|
||||||
add r2, r0, r1
|
|
||||||
vst1.8 {d18}, [r0]
|
|
||||||
vst1.8 {d20}, [r2], r1
|
|
||||||
add r3, r2, r1
|
|
||||||
vst1.8 {d7}, [r2]
|
|
||||||
vst1.8 {d9}, [r3], r1
|
|
||||||
add r12, r3, r1
|
|
||||||
vst1.8 {d11}, [r3]
|
|
||||||
vst1.8 {d13}, [r12], r1
|
|
||||||
add r0, r12, r1
|
|
||||||
vst1.8 {d15}, [r12]
|
|
||||||
vst1.8 {d17}, [r0], r1
|
|
||||||
add r2, r0, r1
|
|
||||||
vst1.8 {d19}, [r0]
|
|
||||||
vst1.8 {d21}, [r2]
|
|
||||||
|
|
||||||
ldmia sp!, {pc}
|
|
||||||
ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
|
|
||||||
|
|
||||||
; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
|
|
||||||
; const signed char *flimit,
|
|
||||||
; const signed char *limit,
|
|
||||||
; const signed char *thresh,
|
|
||||||
; unsigned char *v)
|
|
||||||
; r0 unsigned char *u,
|
|
||||||
; r1 int pitch,
|
|
||||||
; r2 const signed char *flimit,
|
|
||||||
; r3 const signed char *limit,
|
|
||||||
; sp const signed char *thresh,
|
|
||||||
; sp+4 unsigned char *v
|
|
||||||
|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
|
|
||||||
stmdb sp!, {lr}
|
|
||||||
sub r0, r0, #4 ; move src pointer down by 4 columns
|
|
||||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
|
||||||
ldr r3, [sp, #8] ; load v ptr
|
|
||||||
ldr r12, [sp, #4] ; load thresh pointer
|
|
||||||
|
|
||||||
sub r3, r3, #4 ; move v pointer down by 4 columns
|
|
||||||
|
|
||||||
vld1.u8 {d6}, [r0], r1 ;load u data
|
|
||||||
vld1.u8 {d7}, [r3], r1 ;load v data
|
|
||||||
vld1.u8 {d8}, [r0], r1
|
|
||||||
vld1.u8 {d9}, [r3], r1
|
|
||||||
vld1.u8 {d10}, [r0], r1
|
|
||||||
vld1.u8 {d11}, [r3], r1
|
|
||||||
vld1.u8 {d12}, [r0], r1
|
|
||||||
vld1.u8 {d13}, [r3], r1
|
|
||||||
vld1.u8 {d14}, [r0], r1
|
|
||||||
vld1.u8 {d15}, [r3], r1
|
|
||||||
vld1.u8 {d16}, [r0], r1
|
|
||||||
vld1.u8 {d17}, [r3], r1
|
|
||||||
vld1.u8 {d18}, [r0], r1
|
|
||||||
vld1.u8 {d19}, [r3], r1
|
|
||||||
vld1.u8 {d20}, [r0], r1
|
|
||||||
vld1.u8 {d21}, [r3], r1
|
|
||||||
|
|
||||||
;transpose to 8x16 matrix
|
|
||||||
vtrn.32 q3, q7
|
|
||||||
vtrn.32 q4, q8
|
|
||||||
vtrn.32 q5, q9
|
|
||||||
vtrn.32 q6, q10
|
|
||||||
|
|
||||||
vtrn.16 q3, q5
|
|
||||||
vtrn.16 q4, q6
|
|
||||||
vtrn.16 q7, q9
|
|
||||||
vtrn.16 q8, q10
|
|
||||||
|
|
||||||
vtrn.8 q3, q4
|
|
||||||
vtrn.8 q5, q6
|
|
||||||
vtrn.8 q7, q8
|
|
||||||
vtrn.8 q9, q10
|
|
||||||
|
|
||||||
sub sp, sp, #32
|
|
||||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
|
||||||
mov r12, sp
|
|
||||||
vst1.u8 {q3}, [r12]!
|
|
||||||
vst1.u8 {q10}, [r12]!
|
|
||||||
|
|
||||||
bl vp8_mbloop_filter_neon
|
|
||||||
|
|
||||||
sub r0, r0, r1, lsl #3
|
sub r0, r0, r1, lsl #3
|
||||||
sub r3, r3, r1, lsl #3
|
|
||||||
|
|
||||||
vld1.u8 {q3}, [sp]!
|
bl vp8_mbloop_filter_neon
|
||||||
vld1.u8 {q10}, [sp]!
|
|
||||||
|
sub r12, r12, r1, lsl #3
|
||||||
|
|
||||||
;transpose to 16x8 matrix
|
;transpose to 16x8 matrix
|
||||||
vtrn.32 q3, q7
|
vtrn.32 q3, q7
|
||||||
@ -326,23 +192,118 @@
|
|||||||
|
|
||||||
;store op2, op1, op0, oq0, oq1, oq2
|
;store op2, op1, op0, oq0, oq1, oq2
|
||||||
vst1.8 {d6}, [r0], r1
|
vst1.8 {d6}, [r0], r1
|
||||||
vst1.8 {d7}, [r3], r1
|
vst1.8 {d7}, [r12], r1
|
||||||
vst1.8 {d8}, [r0], r1
|
vst1.8 {d8}, [r0], r1
|
||||||
vst1.8 {d9}, [r3], r1
|
vst1.8 {d9}, [r12], r1
|
||||||
vst1.8 {d10}, [r0], r1
|
vst1.8 {d10}, [r0], r1
|
||||||
vst1.8 {d11}, [r3], r1
|
vst1.8 {d11}, [r12], r1
|
||||||
vst1.8 {d12}, [r0], r1
|
vst1.8 {d12}, [r0], r1
|
||||||
vst1.8 {d13}, [r3], r1
|
vst1.8 {d13}, [r12], r1
|
||||||
vst1.8 {d14}, [r0], r1
|
vst1.8 {d14}, [r0], r1
|
||||||
vst1.8 {d15}, [r3], r1
|
vst1.8 {d15}, [r12], r1
|
||||||
vst1.8 {d16}, [r0], r1
|
vst1.8 {d16}, [r0], r1
|
||||||
vst1.8 {d17}, [r3], r1
|
vst1.8 {d17}, [r12], r1
|
||||||
vst1.8 {d18}, [r0], r1
|
vst1.8 {d18}, [r0], r1
|
||||||
vst1.8 {d19}, [r3], r1
|
vst1.8 {d19}, [r12], r1
|
||||||
vst1.8 {d20}, [r0], r1
|
vst1.8 {d20}, [r0]
|
||||||
vst1.8 {d21}, [r3], r1
|
vst1.8 {d21}, [r12]
|
||||||
|
|
||||||
ldmia sp!, {pc}
|
pop {pc}
|
||||||
|
ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
|
||||||
|
|
||||||
|
; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
|
||||||
|
; const unsigned char *blimit,
|
||||||
|
; const unsigned char *limit,
|
||||||
|
; const unsigned char *thresh,
|
||||||
|
; unsigned char *v)
|
||||||
|
; r0 unsigned char *u,
|
||||||
|
; r1 int pitch,
|
||||||
|
; r2 const signed char *flimit,
|
||||||
|
; r3 const signed char *limit,
|
||||||
|
; sp const signed char *thresh,
|
||||||
|
; sp+4 unsigned char *v
|
||||||
|
|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
|
||||||
|
push {lr}
|
||||||
|
ldr r12, [sp, #4] ; load thresh
|
||||||
|
sub r0, r0, #4 ; move u pointer down by 4 columns
|
||||||
|
vdup.u8 q2, r12 ; thresh
|
||||||
|
ldr r12, [sp, #8] ; load v ptr
|
||||||
|
sub r12, r12, #4 ; move v pointer down by 4 columns
|
||||||
|
|
||||||
|
vld1.u8 {d6}, [r0], r1 ;load u data
|
||||||
|
vld1.u8 {d7}, [r12], r1 ;load v data
|
||||||
|
vld1.u8 {d8}, [r0], r1
|
||||||
|
vld1.u8 {d9}, [r12], r1
|
||||||
|
vld1.u8 {d10}, [r0], r1
|
||||||
|
vld1.u8 {d11}, [r12], r1
|
||||||
|
vld1.u8 {d12}, [r0], r1
|
||||||
|
vld1.u8 {d13}, [r12], r1
|
||||||
|
vld1.u8 {d14}, [r0], r1
|
||||||
|
vld1.u8 {d15}, [r12], r1
|
||||||
|
vld1.u8 {d16}, [r0], r1
|
||||||
|
vld1.u8 {d17}, [r12], r1
|
||||||
|
vld1.u8 {d18}, [r0], r1
|
||||||
|
vld1.u8 {d19}, [r12], r1
|
||||||
|
vld1.u8 {d20}, [r0], r1
|
||||||
|
vld1.u8 {d21}, [r12], r1
|
||||||
|
|
||||||
|
;transpose to 8x16 matrix
|
||||||
|
vtrn.32 q3, q7
|
||||||
|
vtrn.32 q4, q8
|
||||||
|
vtrn.32 q5, q9
|
||||||
|
vtrn.32 q6, q10
|
||||||
|
|
||||||
|
vtrn.16 q3, q5
|
||||||
|
vtrn.16 q4, q6
|
||||||
|
vtrn.16 q7, q9
|
||||||
|
vtrn.16 q8, q10
|
||||||
|
|
||||||
|
vtrn.8 q3, q4
|
||||||
|
vtrn.8 q5, q6
|
||||||
|
vtrn.8 q7, q8
|
||||||
|
vtrn.8 q9, q10
|
||||||
|
|
||||||
|
sub r0, r0, r1, lsl #3
|
||||||
|
|
||||||
|
bl vp8_mbloop_filter_neon
|
||||||
|
|
||||||
|
sub r12, r12, r1, lsl #3
|
||||||
|
|
||||||
|
;transpose to 16x8 matrix
|
||||||
|
vtrn.32 q3, q7
|
||||||
|
vtrn.32 q4, q8
|
||||||
|
vtrn.32 q5, q9
|
||||||
|
vtrn.32 q6, q10
|
||||||
|
|
||||||
|
vtrn.16 q3, q5
|
||||||
|
vtrn.16 q4, q6
|
||||||
|
vtrn.16 q7, q9
|
||||||
|
vtrn.16 q8, q10
|
||||||
|
|
||||||
|
vtrn.8 q3, q4
|
||||||
|
vtrn.8 q5, q6
|
||||||
|
vtrn.8 q7, q8
|
||||||
|
vtrn.8 q9, q10
|
||||||
|
|
||||||
|
;store op2, op1, op0, oq0, oq1, oq2
|
||||||
|
vst1.8 {d6}, [r0], r1
|
||||||
|
vst1.8 {d7}, [r12], r1
|
||||||
|
vst1.8 {d8}, [r0], r1
|
||||||
|
vst1.8 {d9}, [r12], r1
|
||||||
|
vst1.8 {d10}, [r0], r1
|
||||||
|
vst1.8 {d11}, [r12], r1
|
||||||
|
vst1.8 {d12}, [r0], r1
|
||||||
|
vst1.8 {d13}, [r12], r1
|
||||||
|
vst1.8 {d14}, [r0], r1
|
||||||
|
vst1.8 {d15}, [r12], r1
|
||||||
|
vst1.8 {d16}, [r0], r1
|
||||||
|
vst1.8 {d17}, [r12], r1
|
||||||
|
vst1.8 {d18}, [r0], r1
|
||||||
|
vst1.8 {d19}, [r12], r1
|
||||||
|
vst1.8 {d20}, [r0]
|
||||||
|
vst1.8 {d21}, [r12]
|
||||||
|
|
||||||
|
pop {pc}
|
||||||
ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon|
|
ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon|
|
||||||
|
|
||||||
; void vp8_mbloop_filter_neon()
|
; void vp8_mbloop_filter_neon()
|
||||||
@ -350,26 +311,19 @@
|
|||||||
; functions do the necessary load, transpose (if necessary), preserve (if
|
; functions do the necessary load, transpose (if necessary), preserve (if
|
||||||
; necessary) and store.
|
; necessary) and store.
|
||||||
|
|
||||||
; TODO:
|
|
||||||
; The vertical filter writes p3/q3 back out because two 4 element writes are
|
|
||||||
; much simpler than ordering and writing two 3 element sets (or three 2 elements
|
|
||||||
; sets, or whichever other combinations are possible).
|
|
||||||
; If we can preserve q3 and q10, the vertical filter will be able to avoid
|
|
||||||
; storing those values on the stack and reading them back after the filter.
|
|
||||||
|
|
||||||
; r0,r1 PRESERVE
|
; r0,r1 PRESERVE
|
||||||
; r2 flimit
|
; r2 mblimit
|
||||||
; r3 PRESERVE
|
; r3 limit
|
||||||
; q1 limit
|
|
||||||
; q2 thresh
|
; q2 thresh
|
||||||
; q3 p3
|
; q3 p3 PRESERVE
|
||||||
; q4 p2
|
; q4 p2
|
||||||
; q5 p1
|
; q5 p1
|
||||||
; q6 p0
|
; q6 p0
|
||||||
; q7 q0
|
; q7 q0
|
||||||
; q8 q1
|
; q8 q1
|
||||||
; q9 q2
|
; q9 q2
|
||||||
; q10 q3
|
; q10 q3 PRESERVE
|
||||||
|
|
||||||
|vp8_mbloop_filter_neon| PROC
|
|vp8_mbloop_filter_neon| PROC
|
||||||
|
|
||||||
@ -378,12 +332,12 @@
|
|||||||
vabd.u8 q12, q4, q5 ; abs(p2 - p1)
|
vabd.u8 q12, q4, q5 ; abs(p2 - p1)
|
||||||
vabd.u8 q13, q5, q6 ; abs(p1 - p0)
|
vabd.u8 q13, q5, q6 ; abs(p1 - p0)
|
||||||
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
|
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
|
||||||
vabd.u8 q3, q9, q8 ; abs(q2 - q1)
|
vabd.u8 q1, q9, q8 ; abs(q2 - q1)
|
||||||
vabd.u8 q0, q10, q9 ; abs(q3 - q2)
|
vabd.u8 q0, q10, q9 ; abs(q3 - q2)
|
||||||
|
|
||||||
vmax.u8 q11, q11, q12
|
vmax.u8 q11, q11, q12
|
||||||
vmax.u8 q12, q13, q14
|
vmax.u8 q12, q13, q14
|
||||||
vmax.u8 q3, q3, q0
|
vmax.u8 q1, q1, q0
|
||||||
vmax.u8 q15, q11, q12
|
vmax.u8 q15, q11, q12
|
||||||
|
|
||||||
vabd.u8 q12, q6, q7 ; abs(p0 - q0)
|
vabd.u8 q12, q6, q7 ; abs(p0 - q0)
|
||||||
@ -391,44 +345,46 @@
|
|||||||
; vp8_hevmask
|
; vp8_hevmask
|
||||||
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh) * -1
|
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh) * -1
|
||||||
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh) * -1
|
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh) * -1
|
||||||
vmax.u8 q15, q15, q3
|
vmax.u8 q15, q15, q1
|
||||||
|
|
||||||
vld1.s8 {d4[], d5[]}, [r2] ; flimit
|
vdup.u8 q1, r3 ; limit
|
||||||
|
vdup.u8 q2, r2 ; mblimit
|
||||||
|
|
||||||
vmov.u8 q0, #0x80 ; 0x80
|
vmov.u8 q0, #0x80 ; 0x80
|
||||||
|
|
||||||
vadd.u8 q2, q2, q2 ; flimit * 2
|
|
||||||
vadd.u8 q2, q2, q1 ; flimit * 2 + limit
|
|
||||||
vcge.u8 q15, q1, q15
|
vcge.u8 q15, q1, q15
|
||||||
|
|
||||||
vabd.u8 q1, q5, q8 ; a = abs(p1 - q1)
|
vabd.u8 q1, q5, q8 ; a = abs(p1 - q1)
|
||||||
vqadd.u8 q12, q12, q12 ; b = abs(p0 - q0) * 2
|
vqadd.u8 q12, q12, q12 ; b = abs(p0 - q0) * 2
|
||||||
vshr.u8 q1, q1, #1 ; a = a / 2
|
vmov.u16 q11, #3 ; #3
|
||||||
vqadd.u8 q12, q12, q1 ; a = b + a
|
|
||||||
vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1
|
|
||||||
|
|
||||||
; vp8_filter
|
; vp8_filter
|
||||||
; convert to signed
|
; convert to signed
|
||||||
veor q7, q7, q0 ; qs0
|
veor q7, q7, q0 ; qs0
|
||||||
|
vshr.u8 q1, q1, #1 ; a = a / 2
|
||||||
veor q6, q6, q0 ; ps0
|
veor q6, q6, q0 ; ps0
|
||||||
veor q5, q5, q0 ; ps1
|
veor q5, q5, q0 ; ps1
|
||||||
|
|
||||||
|
vqadd.u8 q12, q12, q1 ; a = b + a
|
||||||
|
|
||||||
veor q8, q8, q0 ; qs1
|
veor q8, q8, q0 ; qs1
|
||||||
veor q4, q4, q0 ; ps2
|
veor q4, q4, q0 ; ps2
|
||||||
veor q9, q9, q0 ; qs2
|
veor q9, q9, q0 ; qs2
|
||||||
|
|
||||||
vorr q14, q13, q14 ; vp8_hevmask
|
vorr q14, q13, q14 ; vp8_hevmask
|
||||||
|
|
||||||
|
vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1
|
||||||
|
|
||||||
vsubl.s8 q2, d14, d12 ; qs0 - ps0
|
vsubl.s8 q2, d14, d12 ; qs0 - ps0
|
||||||
vsubl.s8 q13, d15, d13
|
vsubl.s8 q13, d15, d13
|
||||||
|
|
||||||
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
|
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
|
||||||
|
|
||||||
vadd.s16 q10, q2, q2 ; 3 * (qs0 - ps0)
|
vmul.i16 q2, q2, q11 ; 3 * ( qs0 - ps0)
|
||||||
vadd.s16 q11, q13, q13
|
|
||||||
vand q15, q15, q12 ; vp8_filter_mask
|
vand q15, q15, q12 ; vp8_filter_mask
|
||||||
|
|
||||||
vadd.s16 q2, q2, q10
|
vmul.i16 q13, q13, q11
|
||||||
vadd.s16 q13, q13, q11
|
|
||||||
|
|
||||||
vmov.u8 q12, #3 ; #3
|
vmov.u8 q12, #3 ; #3
|
||||||
|
|
||||||
@ -447,23 +403,19 @@
|
|||||||
|
|
||||||
vand q13, q1, q14 ; Filter2 &= hev
|
vand q13, q1, q14 ; Filter2 &= hev
|
||||||
|
|
||||||
vmov.u8 d7, #9 ; #9
|
|
||||||
|
|
||||||
vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4)
|
vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4)
|
||||||
vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3)
|
vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3)
|
||||||
|
|
||||||
vmov.u8 d6, #18 ; #18
|
vmov q0, q15
|
||||||
|
|
||||||
vshr.s8 q2, q2, #3 ; Filter1 >>= 3
|
vshr.s8 q2, q2, #3 ; Filter1 >>= 3
|
||||||
vshr.s8 q13, q13, #3 ; Filter2 >>= 3
|
vshr.s8 q13, q13, #3 ; Filter2 >>= 3
|
||||||
|
|
||||||
vmov q10, q15
|
vmov q11, q15
|
||||||
vmov q12, q15
|
vmov q12, q15
|
||||||
|
|
||||||
vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1)
|
vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1)
|
||||||
|
|
||||||
vmov.u8 d5, #27 ; #27
|
|
||||||
|
|
||||||
vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2)
|
vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2)
|
||||||
|
|
||||||
vbic q1, q1, q14 ; vp8_filter &= ~hev
|
vbic q1, q1, q14 ; vp8_filter &= ~hev
|
||||||
@ -471,35 +423,43 @@
|
|||||||
; roughly 1/7th difference across boundary
|
; roughly 1/7th difference across boundary
|
||||||
; roughly 2/7th difference across boundary
|
; roughly 2/7th difference across boundary
|
||||||
; roughly 3/7th difference across boundary
|
; roughly 3/7th difference across boundary
|
||||||
vmov q11, q15
|
|
||||||
|
vmov.u8 d5, #9 ; #9
|
||||||
|
vmov.u8 d4, #18 ; #18
|
||||||
|
|
||||||
vmov q13, q15
|
vmov q13, q15
|
||||||
vmov q14, q15
|
vmov q14, q15
|
||||||
|
|
||||||
vmlal.s8 q10, d2, d7 ; Filter2 * 9
|
vmlal.s8 q0, d2, d5 ; 63 + Filter2 * 9
|
||||||
vmlal.s8 q11, d3, d7
|
vmlal.s8 q11, d3, d5
|
||||||
vmlal.s8 q12, d2, d6 ; Filter2 * 18
|
vmov.u8 d5, #27 ; #27
|
||||||
vmlal.s8 q13, d3, d6
|
vmlal.s8 q12, d2, d4 ; 63 + Filter2 * 18
|
||||||
vmlal.s8 q14, d2, d5 ; Filter2 * 27
|
vmlal.s8 q13, d3, d4
|
||||||
|
vmlal.s8 q14, d2, d5 ; 63 + Filter2 * 27
|
||||||
vmlal.s8 q15, d3, d5
|
vmlal.s8 q15, d3, d5
|
||||||
vqshrn.s16 d20, q10, #7 ; u = clamp((63 + Filter2 * 9)>>7)
|
|
||||||
vqshrn.s16 d21, q11, #7
|
vqshrn.s16 d0, q0, #7 ; u = clamp((63 + Filter2 * 9)>>7)
|
||||||
|
vqshrn.s16 d1, q11, #7
|
||||||
vqshrn.s16 d24, q12, #7 ; u = clamp((63 + Filter2 * 18)>>7)
|
vqshrn.s16 d24, q12, #7 ; u = clamp((63 + Filter2 * 18)>>7)
|
||||||
vqshrn.s16 d25, q13, #7
|
vqshrn.s16 d25, q13, #7
|
||||||
vqshrn.s16 d28, q14, #7 ; u = clamp((63 + Filter2 * 27)>>7)
|
vqshrn.s16 d28, q14, #7 ; u = clamp((63 + Filter2 * 27)>>7)
|
||||||
vqshrn.s16 d29, q15, #7
|
vqshrn.s16 d29, q15, #7
|
||||||
|
|
||||||
vqsub.s8 q11, q9, q10 ; s = clamp(qs2 - u)
|
vmov.u8 q1, #0x80 ; 0x80
|
||||||
vqadd.s8 q10, q4, q10 ; s = clamp(ps2 + u)
|
|
||||||
|
vqsub.s8 q11, q9, q0 ; s = clamp(qs2 - u)
|
||||||
|
vqadd.s8 q0, q4, q0 ; s = clamp(ps2 + u)
|
||||||
vqsub.s8 q13, q8, q12 ; s = clamp(qs1 - u)
|
vqsub.s8 q13, q8, q12 ; s = clamp(qs1 - u)
|
||||||
vqadd.s8 q12, q5, q12 ; s = clamp(ps1 + u)
|
vqadd.s8 q12, q5, q12 ; s = clamp(ps1 + u)
|
||||||
vqsub.s8 q15, q7, q14 ; s = clamp(qs0 - u)
|
vqsub.s8 q15, q7, q14 ; s = clamp(qs0 - u)
|
||||||
vqadd.s8 q14, q6, q14 ; s = clamp(ps0 + u)
|
vqadd.s8 q14, q6, q14 ; s = clamp(ps0 + u)
|
||||||
veor q9, q11, q0 ; *oq2 = s^0x80
|
|
||||||
veor q4, q10, q0 ; *op2 = s^0x80
|
veor q9, q11, q1 ; *oq2 = s^0x80
|
||||||
veor q8, q13, q0 ; *oq1 = s^0x80
|
veor q4, q0, q1 ; *op2 = s^0x80
|
||||||
veor q5, q12, q0 ; *op2 = s^0x80
|
veor q8, q13, q1 ; *oq1 = s^0x80
|
||||||
veor q7, q15, q0 ; *oq0 = s^0x80
|
veor q5, q12, q1 ; *op2 = s^0x80
|
||||||
veor q6, q14, q0 ; *op0 = s^0x80
|
veor q7, q15, q1 ; *oq0 = s^0x80
|
||||||
|
veor q6, q14, q1 ; *op0 = s^0x80
|
||||||
|
|
||||||
bx lr
|
bx lr
|
||||||
ENDP ; |vp8_mbloop_filter_neon|
|
ENDP ; |vp8_mbloop_filter_neon|
|
||||||
|
@ -108,9 +108,9 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
|||||||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_c;
|
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_c;
|
||||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
|
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
|
||||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_c;
|
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_c;
|
||||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c;
|
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_c;
|
||||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_c;
|
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_c;
|
||||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
|
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_c;
|
||||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
|
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
|
||||||
|
|
||||||
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_INTERNAL_STATS)
|
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_INTERNAL_STATS)
|
||||||
|
@ -9,152 +9,149 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "vpx_ports/config.h"
|
#include "vpx_config.h"
|
||||||
#include "loopfilter.h"
|
#include "loopfilter.h"
|
||||||
#include "onyxc_int.h"
|
#include "onyxc_int.h"
|
||||||
|
#include "vpx_mem/vpx_mem.h"
|
||||||
|
|
||||||
typedef unsigned char uc;
|
typedef unsigned char uc;
|
||||||
|
|
||||||
|
|
||||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
|
prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
|
||||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
|
prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
|
||||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
|
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
|
||||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
|
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
|
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
|
prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
|
||||||
|
prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
|
||||||
|
|
||||||
/* Horizontal MB filtering */
|
/* Horizontal MB filtering */
|
||||||
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||||
|
loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Vertical MB Filtering */
|
/* Vertical MB Filtering */
|
||||||
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||||
|
loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Horizontal B Filtering */
|
/* Horizontal B Filtering */
|
||||||
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||||
|
loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
const unsigned char *blimit)
|
||||||
{
|
{
|
||||||
(void) u_ptr;
|
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||||
(void) v_ptr;
|
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||||
(void) uv_stride;
|
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Vertical B Filtering */
|
/* Vertical B Filtering */
|
||||||
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||||
|
loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
const unsigned char *blimit)
|
||||||
{
|
{
|
||||||
(void) u_ptr;
|
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
|
||||||
(void) v_ptr;
|
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
|
||||||
(void) uv_stride;
|
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
|
||||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp8_init_loop_filter(VP8_COMMON *cm)
|
static void lf_init_lut(loop_filter_info_n *lfi)
|
||||||
{
|
{
|
||||||
loop_filter_info *lfi = cm->lf_info;
|
int filt_lvl;
|
||||||
LOOPFILTERTYPE lft = cm->filter_type;
|
|
||||||
int sharpness_lvl = cm->sharpness_level;
|
|
||||||
int frame_type = cm->frame_type;
|
|
||||||
int i, j;
|
|
||||||
|
|
||||||
int block_inside_limit = 0;
|
for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++)
|
||||||
int HEVThresh;
|
|
||||||
|
|
||||||
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
|
|
||||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
|
||||||
{
|
{
|
||||||
int filt_lvl = i;
|
if (filt_lvl >= 40)
|
||||||
|
|
||||||
if (frame_type == KEY_FRAME)
|
|
||||||
{
|
{
|
||||||
if (filt_lvl >= 40)
|
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2;
|
||||||
HEVThresh = 2;
|
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3;
|
||||||
else if (filt_lvl >= 15)
|
}
|
||||||
HEVThresh = 1;
|
else if (filt_lvl >= 20)
|
||||||
else
|
{
|
||||||
HEVThresh = 0;
|
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
|
||||||
|
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2;
|
||||||
|
}
|
||||||
|
else if (filt_lvl >= 15)
|
||||||
|
{
|
||||||
|
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
|
||||||
|
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (filt_lvl >= 40)
|
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0;
|
||||||
HEVThresh = 3;
|
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0;
|
||||||
else if (filt_lvl >= 20)
|
|
||||||
HEVThresh = 2;
|
|
||||||
else if (filt_lvl >= 15)
|
|
||||||
HEVThresh = 1;
|
|
||||||
else
|
|
||||||
HEVThresh = 0;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lfi->mode_lf_lut[DC_PRED] = 1;
|
||||||
|
lfi->mode_lf_lut[V_PRED] = 1;
|
||||||
|
lfi->mode_lf_lut[H_PRED] = 1;
|
||||||
|
lfi->mode_lf_lut[TM_PRED] = 1;
|
||||||
|
lfi->mode_lf_lut[B_PRED] = 0;
|
||||||
|
|
||||||
|
lfi->mode_lf_lut[ZEROMV] = 1;
|
||||||
|
lfi->mode_lf_lut[NEARESTMV] = 2;
|
||||||
|
lfi->mode_lf_lut[NEARMV] = 2;
|
||||||
|
lfi->mode_lf_lut[NEWMV] = 2;
|
||||||
|
lfi->mode_lf_lut[SPLITMV] = 3;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
|
||||||
|
int sharpness_lvl)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* For each possible value for the loop filter fill out limits */
|
||||||
|
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||||
|
{
|
||||||
|
int filt_lvl = i;
|
||||||
|
int block_inside_limit = 0;
|
||||||
|
|
||||||
/* Set loop filter paramaeters that control sharpness. */
|
/* Set loop filter paramaeters that control sharpness. */
|
||||||
block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
|
block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
|
||||||
@ -169,119 +166,120 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
|
|||||||
if (block_inside_limit < 1)
|
if (block_inside_limit < 1)
|
||||||
block_inside_limit = 1;
|
block_inside_limit = 1;
|
||||||
|
|
||||||
for (j = 0; j < 16; j++)
|
vpx_memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
|
||||||
{
|
vpx_memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit),
|
||||||
lfi[i].lim[j] = block_inside_limit;
|
SIMD_WIDTH);
|
||||||
lfi[i].mbflim[j] = filt_lvl + 2;
|
vpx_memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
|
||||||
lfi[i].flim[j] = filt_lvl;
|
SIMD_WIDTH);
|
||||||
lfi[i].thr[j] = HEVThresh;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Set up the function pointers depending on the type of loop filtering selected */
|
|
||||||
if (lft == NORMAL_LOOPFILTER)
|
|
||||||
{
|
|
||||||
cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v);
|
|
||||||
cm->lf_bv = LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v);
|
|
||||||
cm->lf_mbh = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h);
|
|
||||||
cm->lf_bh = LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v);
|
|
||||||
cm->lf_bv = LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v);
|
|
||||||
cm->lf_mbh = LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h);
|
|
||||||
cm->lf_bh = LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
|
void vp8_loop_filter_init(VP8_COMMON *cm)
|
||||||
* each frame. Check last_frame_type to skip the function most of times.
|
|
||||||
*/
|
|
||||||
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
|
|
||||||
{
|
{
|
||||||
int HEVThresh;
|
loop_filter_info_n *lfi = &cm->lf_info;
|
||||||
int i, j;
|
int i;
|
||||||
|
|
||||||
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
|
/* init limits for given sharpness*/
|
||||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level);
|
||||||
|
cm->last_sharpness_level = cm->sharpness_level;
|
||||||
|
|
||||||
|
/* init LUT for lvl and hev thr picking */
|
||||||
|
lf_init_lut(lfi);
|
||||||
|
|
||||||
|
/* init hev threshold const vectors */
|
||||||
|
for(i = 0; i < 4 ; i++)
|
||||||
{
|
{
|
||||||
int filt_lvl = i;
|
vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
|
||||||
|
|
||||||
if (frame_type == KEY_FRAME)
|
|
||||||
{
|
|
||||||
if (filt_lvl >= 40)
|
|
||||||
HEVThresh = 2;
|
|
||||||
else if (filt_lvl >= 15)
|
|
||||||
HEVThresh = 1;
|
|
||||||
else
|
|
||||||
HEVThresh = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (filt_lvl >= 40)
|
|
||||||
HEVThresh = 3;
|
|
||||||
else if (filt_lvl >= 20)
|
|
||||||
HEVThresh = 2;
|
|
||||||
else if (filt_lvl >= 15)
|
|
||||||
HEVThresh = 1;
|
|
||||||
else
|
|
||||||
HEVThresh = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (j = 0; j < 16; j++)
|
|
||||||
{
|
|
||||||
/*lfi[i].lim[j] = block_inside_limit;
|
|
||||||
lfi[i].mbflim[j] = filt_lvl+2;*/
|
|
||||||
/*lfi[i].flim[j] = filt_lvl;*/
|
|
||||||
lfi[i].thr[j] = HEVThresh;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void vp8_loop_filter_frame_init(VP8_COMMON *cm,
|
||||||
int vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int filter_level)
|
MACROBLOCKD *mbd,
|
||||||
|
int default_filt_lvl,
|
||||||
|
int sharpness_lvl)
|
||||||
{
|
{
|
||||||
MB_MODE_INFO *mbmi = &mbd->mode_info_context->mbmi;
|
int seg, /* segment number */
|
||||||
|
ref, /* index in ref_lf_deltas */
|
||||||
|
mode; /* index in mode_lf_deltas */
|
||||||
|
|
||||||
if (mbd->mode_ref_lf_delta_enabled)
|
loop_filter_info_n *lfi = &cm->lf_info;
|
||||||
|
|
||||||
|
/* update limits if sharpness has changed */
|
||||||
|
if(cm->last_sharpness_level != sharpness_lvl)
|
||||||
{
|
{
|
||||||
|
vp8_loop_filter_update_sharpness(lfi, sharpness_lvl);
|
||||||
|
cm->last_sharpness_level = sharpness_lvl;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(seg = 0; seg < MAX_MB_SEGMENTS; seg++)
|
||||||
|
{
|
||||||
|
int lvl_seg = default_filt_lvl;
|
||||||
|
int lvl_ref, lvl_mode;
|
||||||
|
|
||||||
|
/* Note the baseline filter values for each segment */
|
||||||
|
if (mbd->segmentation_enabled)
|
||||||
|
{
|
||||||
|
/* Abs value */
|
||||||
|
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||||
|
{
|
||||||
|
lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
|
||||||
|
}
|
||||||
|
else /* Delta Value */
|
||||||
|
{
|
||||||
|
lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
|
||||||
|
lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!mbd->mode_ref_lf_delta_enabled)
|
||||||
|
{
|
||||||
|
/* we could get rid of this if we assume that deltas are set to
|
||||||
|
* zero when not in use; encoder always uses deltas
|
||||||
|
*/
|
||||||
|
vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
lvl_ref = lvl_seg;
|
||||||
|
|
||||||
|
/* INTRA_FRAME */
|
||||||
|
ref = INTRA_FRAME;
|
||||||
|
|
||||||
/* Apply delta for reference frame */
|
/* Apply delta for reference frame */
|
||||||
filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
|
lvl_ref += mbd->ref_lf_deltas[ref];
|
||||||
|
|
||||||
/* Apply delta for mode */
|
/* Apply delta for Intra modes */
|
||||||
if (mbmi->ref_frame == INTRA_FRAME)
|
mode = 0; /* B_PRED */
|
||||||
|
/* Only the split mode BPRED has a further special case */
|
||||||
|
lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
|
||||||
|
lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
|
||||||
|
|
||||||
|
lfi->lvl[seg][ref][mode] = lvl_mode;
|
||||||
|
|
||||||
|
mode = 1; /* all the rest of Intra modes */
|
||||||
|
lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0; /* clamp */
|
||||||
|
lfi->lvl[seg][ref][mode] = lvl_mode;
|
||||||
|
|
||||||
|
/* LAST, GOLDEN, ALT */
|
||||||
|
for(ref = 1; ref < MAX_REF_FRAMES; ref++)
|
||||||
{
|
{
|
||||||
/* Only the split mode BPRED has a further special case */
|
int lvl_ref = lvl_seg;
|
||||||
if (mbmi->mode == B_PRED)
|
|
||||||
filter_level += mbd->mode_lf_deltas[0];
|
/* Apply delta for reference frame */
|
||||||
|
lvl_ref += mbd->ref_lf_deltas[ref];
|
||||||
|
|
||||||
|
/* Apply delta for Inter modes */
|
||||||
|
for (mode = 1; mode < 4; mode++)
|
||||||
|
{
|
||||||
|
lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
|
||||||
|
lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
|
||||||
|
|
||||||
|
lfi->lvl[seg][ref][mode] = lvl_mode;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Zero motion mode */
|
|
||||||
if (mbmi->mode == ZEROMV)
|
|
||||||
filter_level += mbd->mode_lf_deltas[1];
|
|
||||||
|
|
||||||
/* Split MB motion mode */
|
|
||||||
else if (mbmi->mode == SPLITMV)
|
|
||||||
filter_level += mbd->mode_lf_deltas[3];
|
|
||||||
|
|
||||||
/* All other inter motion modes (Nearest, Near, New) */
|
|
||||||
else
|
|
||||||
filter_level += mbd->mode_lf_deltas[2];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Range check */
|
|
||||||
if (filter_level > MAX_LOOP_FILTER)
|
|
||||||
filter_level = MAX_LOOP_FILTER;
|
|
||||||
else if (filter_level < 0)
|
|
||||||
filter_level = 0;
|
|
||||||
}
|
}
|
||||||
return filter_level;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_frame
|
void vp8_loop_filter_frame
|
||||||
(
|
(
|
||||||
VP8_COMMON *cm,
|
VP8_COMMON *cm,
|
||||||
@ -290,49 +288,23 @@ void vp8_loop_filter_frame
|
|||||||
)
|
)
|
||||||
{
|
{
|
||||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||||
loop_filter_info *lfi = cm->lf_info;
|
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||||
|
loop_filter_info lfi;
|
||||||
|
|
||||||
FRAME_TYPE frame_type = cm->frame_type;
|
FRAME_TYPE frame_type = cm->frame_type;
|
||||||
|
|
||||||
int mb_row;
|
int mb_row;
|
||||||
int mb_col;
|
int mb_col;
|
||||||
|
|
||||||
|
|
||||||
int baseline_filter_level[MAX_MB_SEGMENTS];
|
|
||||||
int filter_level;
|
int filter_level;
|
||||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
|
||||||
|
|
||||||
int i;
|
|
||||||
unsigned char *y_ptr, *u_ptr, *v_ptr;
|
unsigned char *y_ptr, *u_ptr, *v_ptr;
|
||||||
|
|
||||||
mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
|
/* Point at base of Mb MODE_INFO list */
|
||||||
|
const MODE_INFO *mode_info_context = cm->mi;
|
||||||
/* Note the baseline filter values for each segment */
|
|
||||||
if (alt_flt_enabled)
|
|
||||||
{
|
|
||||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
|
||||||
{
|
|
||||||
/* Abs value */
|
|
||||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
|
||||||
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
|
||||||
/* Delta Value */
|
|
||||||
else
|
|
||||||
{
|
|
||||||
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
|
||||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
|
||||||
baseline_filter_level[i] = default_filt_lvl;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Initialize the loop filter for this frame. */
|
/* Initialize the loop filter for this frame. */
|
||||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl, cm->sharpness_level);
|
||||||
vp8_init_loop_filter(cm);
|
|
||||||
else if (frame_type != cm->last_frame_type)
|
|
||||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
|
||||||
|
|
||||||
/* Set up the buffer pointers */
|
/* Set up the buffer pointers */
|
||||||
y_ptr = post->y_buffer;
|
y_ptr = post->y_buffer;
|
||||||
@ -344,51 +316,79 @@ void vp8_loop_filter_frame
|
|||||||
{
|
{
|
||||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||||
{
|
{
|
||||||
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
|
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||||
int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED &&
|
mode_info_context->mbmi.mode != SPLITMV &&
|
||||||
mbd->mode_info_context->mbmi.mode != SPLITMV &&
|
mode_info_context->mbmi.mb_skip_coeff);
|
||||||
mbd->mode_info_context->mbmi.mb_skip_coeff);
|
|
||||||
|
|
||||||
filter_level = baseline_filter_level[Segment];
|
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||||
|
const int seg = mode_info_context->mbmi.segment_id;
|
||||||
|
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||||
|
|
||||||
/* Distance of Mb to the various image edges.
|
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||||
* These specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
|
||||||
* Apply any context driven MB level adjustment
|
|
||||||
*/
|
|
||||||
filter_level = vp8_adjust_mb_lf_value(mbd, filter_level);
|
|
||||||
|
|
||||||
if (filter_level)
|
if (filter_level)
|
||||||
{
|
{
|
||||||
if (mb_col > 0)
|
if (cm->filter_type == NORMAL_LOOPFILTER)
|
||||||
cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]);
|
{
|
||||||
|
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||||
|
lfi.mblim = lfi_n->mblim[filter_level];
|
||||||
|
lfi.blim = lfi_n->blim[filter_level];
|
||||||
|
lfi.lim = lfi_n->lim[filter_level];
|
||||||
|
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||||
|
|
||||||
if (!skip_lf)
|
if (mb_col > 0)
|
||||||
cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]);
|
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
|
||||||
|
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
||||||
|
|
||||||
/* don't apply across umv border */
|
if (!skip_lf)
|
||||||
if (mb_row > 0)
|
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
|
||||||
cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]);
|
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
||||||
|
|
||||||
if (!skip_lf)
|
/* don't apply across umv border */
|
||||||
cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]);
|
if (mb_row > 0)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
|
||||||
|
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
|
||||||
|
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (mb_col > 0)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
|
||||||
|
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
|
||||||
|
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||||
|
|
||||||
|
/* don't apply across umv border */
|
||||||
|
if (mb_row > 0)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
|
||||||
|
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
|
||||||
|
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
y_ptr += 16;
|
y_ptr += 16;
|
||||||
u_ptr += 8;
|
u_ptr += 8;
|
||||||
v_ptr += 8;
|
v_ptr += 8;
|
||||||
|
|
||||||
mbd->mode_info_context++; /* step to next MB */
|
mode_info_context++; /* step to next MB */
|
||||||
}
|
}
|
||||||
|
|
||||||
y_ptr += post->y_stride * 16 - post->y_width;
|
y_ptr += post->y_stride * 16 - post->y_width;
|
||||||
u_ptr += post->uv_stride * 8 - post->uv_width;
|
u_ptr += post->uv_stride * 8 - post->uv_width;
|
||||||
v_ptr += post->uv_stride * 8 - post->uv_width;
|
v_ptr += post->uv_stride * 8 - post->uv_width;
|
||||||
|
|
||||||
mbd->mode_info_context++; /* Skip border mb */
|
mode_info_context++; /* Skip border mb */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_frame_yonly
|
void vp8_loop_filter_frame_yonly
|
||||||
(
|
(
|
||||||
VP8_COMMON *cm,
|
VP8_COMMON *cm,
|
||||||
@ -399,49 +399,28 @@ void vp8_loop_filter_frame_yonly
|
|||||||
{
|
{
|
||||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||||
|
|
||||||
int i;
|
|
||||||
unsigned char *y_ptr;
|
unsigned char *y_ptr;
|
||||||
int mb_row;
|
int mb_row;
|
||||||
int mb_col;
|
int mb_col;
|
||||||
|
|
||||||
loop_filter_info *lfi = cm->lf_info;
|
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||||
int baseline_filter_level[MAX_MB_SEGMENTS];
|
loop_filter_info lfi;
|
||||||
|
|
||||||
int filter_level;
|
int filter_level;
|
||||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
|
||||||
FRAME_TYPE frame_type = cm->frame_type;
|
FRAME_TYPE frame_type = cm->frame_type;
|
||||||
|
|
||||||
(void) sharpness_lvl;
|
/* Point at base of Mb MODE_INFO list */
|
||||||
|
const MODE_INFO *mode_info_context = cm->mi;
|
||||||
|
|
||||||
/*MODE_INFO * this_mb_mode_info = cm->mi;*/ /* Point at base of Mb MODE_INFO list */
|
sharpness_lvl = cm->sharpness_level;
|
||||||
mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
|
|
||||||
|
|
||||||
/* Note the baseline filter values for each segment */
|
#if 0
|
||||||
if (alt_flt_enabled)
|
if(default_filt_lvl == 0) /* no filter applied */
|
||||||
{
|
return;
|
||||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
#endif
|
||||||
{
|
|
||||||
/* Abs value */
|
|
||||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
|
||||||
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
|
||||||
/* Delta Value */
|
|
||||||
else
|
|
||||||
{
|
|
||||||
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
|
||||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
|
||||||
baseline_filter_level[i] = default_filt_lvl;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Initialize the loop filter for this frame. */
|
/* Initialize the loop filter for this frame. */
|
||||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl, sharpness_lvl);
|
||||||
vp8_init_loop_filter(cm);
|
|
||||||
else if (frame_type != cm->last_frame_type)
|
|
||||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
|
||||||
|
|
||||||
/* Set up the buffer pointers */
|
/* Set up the buffer pointers */
|
||||||
y_ptr = post->y_buffer;
|
y_ptr = post->y_buffer;
|
||||||
@ -451,44 +430,75 @@ void vp8_loop_filter_frame_yonly
|
|||||||
{
|
{
|
||||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||||
{
|
{
|
||||||
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
|
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||||
int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED &&
|
mode_info_context->mbmi.mode != SPLITMV &&
|
||||||
mbd->mode_info_context->mbmi.mode != SPLITMV &&
|
mode_info_context->mbmi.mb_skip_coeff);
|
||||||
mbd->mode_info_context->mbmi.mb_skip_coeff);
|
|
||||||
|
|
||||||
filter_level = baseline_filter_level[Segment];
|
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||||
|
const int seg = mode_info_context->mbmi.segment_id;
|
||||||
|
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||||
|
|
||||||
/* Apply any context driven MB level adjustment */
|
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||||
filter_level = vp8_adjust_mb_lf_value(mbd, filter_level);
|
|
||||||
|
|
||||||
if (filter_level)
|
if (filter_level)
|
||||||
{
|
{
|
||||||
if (mb_col > 0)
|
if (cm->filter_type == NORMAL_LOOPFILTER)
|
||||||
cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
{
|
||||||
|
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||||
|
lfi.mblim = lfi_n->mblim[filter_level];
|
||||||
|
lfi.blim = lfi_n->blim[filter_level];
|
||||||
|
lfi.lim = lfi_n->lim[filter_level];
|
||||||
|
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||||
|
|
||||||
if (!skip_lf)
|
if (mb_col > 0)
|
||||||
cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
|
||||||
|
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||||
|
|
||||||
/* don't apply across umv border */
|
if (!skip_lf)
|
||||||
if (mb_row > 0)
|
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
|
||||||
cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||||
|
|
||||||
if (!skip_lf)
|
/* don't apply across umv border */
|
||||||
cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
if (mb_row > 0)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
|
||||||
|
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
|
||||||
|
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (mb_col > 0)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
|
||||||
|
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
|
||||||
|
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||||
|
|
||||||
|
/* don't apply across umv border */
|
||||||
|
if (mb_row > 0)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
|
||||||
|
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
|
||||||
|
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
y_ptr += 16;
|
y_ptr += 16;
|
||||||
mbd->mode_info_context ++; /* step to next MB */
|
mode_info_context ++; /* step to next MB */
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
y_ptr += post->y_stride * 16 - post->y_width;
|
y_ptr += post->y_stride * 16 - post->y_width;
|
||||||
mbd->mode_info_context ++; /* Skip border mb */
|
mode_info_context ++; /* Skip border mb */
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_partial_frame
|
void vp8_loop_filter_partial_frame
|
||||||
(
|
(
|
||||||
VP8_COMMON *cm,
|
VP8_COMMON *cm,
|
||||||
@ -500,25 +510,32 @@ void vp8_loop_filter_partial_frame
|
|||||||
{
|
{
|
||||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||||
|
|
||||||
int i;
|
|
||||||
unsigned char *y_ptr;
|
unsigned char *y_ptr;
|
||||||
int mb_row;
|
int mb_row;
|
||||||
int mb_col;
|
int mb_col;
|
||||||
/*int mb_rows = post->y_height >> 4;*/
|
|
||||||
int mb_cols = post->y_width >> 4;
|
int mb_cols = post->y_width >> 4;
|
||||||
|
|
||||||
int linestocopy;
|
int linestocopy, i;
|
||||||
|
|
||||||
|
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||||
|
loop_filter_info lfi;
|
||||||
|
|
||||||
loop_filter_info *lfi = cm->lf_info;
|
|
||||||
int baseline_filter_level[MAX_MB_SEGMENTS];
|
|
||||||
int filter_level;
|
int filter_level;
|
||||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
int alt_flt_enabled = mbd->segmentation_enabled;
|
||||||
FRAME_TYPE frame_type = cm->frame_type;
|
FRAME_TYPE frame_type = cm->frame_type;
|
||||||
|
|
||||||
(void) sharpness_lvl;
|
const MODE_INFO *mode_info_context;
|
||||||
|
|
||||||
/*MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);*/ /* Point at base of Mb MODE_INFO list */
|
int lvl_seg[MAX_MB_SEGMENTS];
|
||||||
mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); /* Point at base of Mb MODE_INFO list */
|
|
||||||
|
sharpness_lvl = cm->sharpness_level;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
if(default_filt_lvl == 0) /* no filter applied */
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
|
||||||
|
|
||||||
linestocopy = (post->y_height >> (4 + Fraction));
|
linestocopy = (post->y_height >> (4 + Fraction));
|
||||||
|
|
||||||
@ -531,29 +548,24 @@ void vp8_loop_filter_partial_frame
|
|||||||
if (alt_flt_enabled)
|
if (alt_flt_enabled)
|
||||||
{
|
{
|
||||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||||
{
|
{ /* Abs value */
|
||||||
/* Abs value */
|
|
||||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||||
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
{
|
||||||
|
lvl_seg[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||||
|
}
|
||||||
/* Delta Value */
|
/* Delta Value */
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
lvl_seg[i] = default_filt_lvl
|
||||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
+ mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||||
|
lvl_seg[i] = (lvl_seg[i] > 0) ?
|
||||||
|
((lvl_seg[i] > 63) ? 63: lvl_seg[i]) : 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
lvl_seg[0] = default_filt_lvl;
|
||||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
|
||||||
baseline_filter_level[i] = default_filt_lvl;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Initialize the loop filter for this frame. */
|
|
||||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
|
||||||
vp8_init_loop_filter(cm);
|
|
||||||
else if (frame_type != cm->last_frame_type)
|
|
||||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
|
||||||
|
|
||||||
/* Set up the buffer pointers */
|
/* Set up the buffer pointers */
|
||||||
y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
|
y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
|
||||||
@ -563,32 +575,64 @@ void vp8_loop_filter_partial_frame
|
|||||||
{
|
{
|
||||||
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
||||||
{
|
{
|
||||||
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
|
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||||
int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED &&
|
mode_info_context->mbmi.mode != SPLITMV &&
|
||||||
mbd->mode_info_context->mbmi.mode != SPLITMV &&
|
mode_info_context->mbmi.mb_skip_coeff);
|
||||||
mbd->mode_info_context->mbmi.mb_skip_coeff);
|
|
||||||
|
|
||||||
filter_level = baseline_filter_level[Segment];
|
if (alt_flt_enabled)
|
||||||
|
filter_level = lvl_seg[mode_info_context->mbmi.segment_id];
|
||||||
|
else
|
||||||
|
filter_level = lvl_seg[0];
|
||||||
|
|
||||||
if (filter_level)
|
if (filter_level)
|
||||||
{
|
{
|
||||||
if (mb_col > 0)
|
if (cm->filter_type == NORMAL_LOOPFILTER)
|
||||||
cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
{
|
||||||
|
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||||
|
lfi.mblim = lfi_n->mblim[filter_level];
|
||||||
|
lfi.blim = lfi_n->blim[filter_level];
|
||||||
|
lfi.lim = lfi_n->lim[filter_level];
|
||||||
|
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||||
|
|
||||||
if (!skip_lf)
|
if (mb_col > 0)
|
||||||
cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
|
||||||
|
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||||
|
|
||||||
cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
|
||||||
|
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||||
|
|
||||||
if (!skip_lf)
|
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
|
||||||
cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
|
||||||
|
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (mb_col > 0)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
|
||||||
|
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
|
||||||
|
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||||
|
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
|
||||||
|
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
|
||||||
|
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
y_ptr += 16;
|
y_ptr += 16;
|
||||||
mbd->mode_info_context += 1; /* step to next MB */
|
mode_info_context += 1; /* step to next MB */
|
||||||
}
|
}
|
||||||
|
|
||||||
y_ptr += post->y_stride * 16 - post->y_width;
|
y_ptr += post->y_stride * 16 - post->y_width;
|
||||||
mbd->mode_info_context += 1; /* Skip border mb */
|
mode_info_context += 1; /* Skip border mb */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
#define loopfilter_h
|
#define loopfilter_h
|
||||||
|
|
||||||
#include "vpx_ports/mem.h"
|
#include "vpx_ports/mem.h"
|
||||||
|
#include "vpx_config.h"
|
||||||
|
|
||||||
#define MAX_LOOP_FILTER 63
|
#define MAX_LOOP_FILTER 63
|
||||||
|
|
||||||
@ -22,27 +23,46 @@ typedef enum
|
|||||||
SIMPLE_LOOPFILTER = 1
|
SIMPLE_LOOPFILTER = 1
|
||||||
} LOOPFILTERTYPE;
|
} LOOPFILTERTYPE;
|
||||||
|
|
||||||
/* FRK
|
#if ARCH_ARM
|
||||||
* Need to align this structure so when it is declared and
|
#define SIMD_WIDTH 1
|
||||||
|
#else
|
||||||
|
#define SIMD_WIDTH 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Need to align this structure so when it is declared and
|
||||||
* passed it can be loaded into vector registers.
|
* passed it can be loaded into vector registers.
|
||||||
*/
|
*/
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
DECLARE_ALIGNED(16, signed char, lim[16]);
|
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||||
DECLARE_ALIGNED(16, signed char, flim[16]);
|
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||||
DECLARE_ALIGNED(16, signed char, thr[16]);
|
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||||
DECLARE_ALIGNED(16, signed char, mbflim[16]);
|
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
|
||||||
|
unsigned char lvl[4][4][4];
|
||||||
|
unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
|
||||||
|
unsigned char mode_lf_lut[10];
|
||||||
|
} loop_filter_info_n;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
const unsigned char * mblim;
|
||||||
|
const unsigned char * blim;
|
||||||
|
const unsigned char * lim;
|
||||||
|
const unsigned char * hev_thr;
|
||||||
} loop_filter_info;
|
} loop_filter_info;
|
||||||
|
|
||||||
|
|
||||||
#define prototype_loopfilter(sym) \
|
#define prototype_loopfilter(sym) \
|
||||||
void sym(unsigned char *src, int pitch, const signed char *flimit,\
|
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||||
const signed char *limit, const signed char *thresh, int count)
|
const unsigned char *limit, const unsigned char *thresh, int count)
|
||||||
|
|
||||||
#define prototype_loopfilter_block(sym) \
|
#define prototype_loopfilter_block(sym) \
|
||||||
void sym(unsigned char *y, unsigned char *u, unsigned char *v,\
|
void sym(unsigned char *y, unsigned char *u, unsigned char *v, \
|
||||||
int ystride, int uv_stride, loop_filter_info *lfi)
|
int ystride, int uv_stride, loop_filter_info *lfi)
|
||||||
|
|
||||||
|
#define prototype_simple_loopfilter(sym) \
|
||||||
|
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
|
||||||
|
|
||||||
#if ARCH_X86 || ARCH_X86_64
|
#if ARCH_X86 || ARCH_X86_64
|
||||||
#include "x86/loopfilter_x86.h"
|
#include "x86/loopfilter_x86.h"
|
||||||
#endif
|
#endif
|
||||||
@ -71,38 +91,39 @@ extern prototype_loopfilter_block(vp8_lf_normal_mb_h);
|
|||||||
#endif
|
#endif
|
||||||
extern prototype_loopfilter_block(vp8_lf_normal_b_h);
|
extern prototype_loopfilter_block(vp8_lf_normal_b_h);
|
||||||
|
|
||||||
|
|
||||||
#ifndef vp8_lf_simple_mb_v
|
#ifndef vp8_lf_simple_mb_v
|
||||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_c
|
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_c
|
||||||
#endif
|
#endif
|
||||||
extern prototype_loopfilter_block(vp8_lf_simple_mb_v);
|
extern prototype_simple_loopfilter(vp8_lf_simple_mb_v);
|
||||||
|
|
||||||
#ifndef vp8_lf_simple_b_v
|
#ifndef vp8_lf_simple_b_v
|
||||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_c
|
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_c
|
||||||
#endif
|
#endif
|
||||||
extern prototype_loopfilter_block(vp8_lf_simple_b_v);
|
extern prototype_simple_loopfilter(vp8_lf_simple_b_v);
|
||||||
|
|
||||||
#ifndef vp8_lf_simple_mb_h
|
#ifndef vp8_lf_simple_mb_h
|
||||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_c
|
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_c
|
||||||
#endif
|
#endif
|
||||||
extern prototype_loopfilter_block(vp8_lf_simple_mb_h);
|
extern prototype_simple_loopfilter(vp8_lf_simple_mb_h);
|
||||||
|
|
||||||
#ifndef vp8_lf_simple_b_h
|
#ifndef vp8_lf_simple_b_h
|
||||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_c
|
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_c
|
||||||
#endif
|
#endif
|
||||||
extern prototype_loopfilter_block(vp8_lf_simple_b_h);
|
extern prototype_simple_loopfilter(vp8_lf_simple_b_h);
|
||||||
|
|
||||||
typedef prototype_loopfilter_block((*vp8_lf_block_fn_t));
|
typedef prototype_loopfilter_block((*vp8_lf_block_fn_t));
|
||||||
|
typedef prototype_simple_loopfilter((*vp8_slf_block_fn_t));
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
vp8_lf_block_fn_t normal_mb_v;
|
vp8_lf_block_fn_t normal_mb_v;
|
||||||
vp8_lf_block_fn_t normal_b_v;
|
vp8_lf_block_fn_t normal_b_v;
|
||||||
vp8_lf_block_fn_t normal_mb_h;
|
vp8_lf_block_fn_t normal_mb_h;
|
||||||
vp8_lf_block_fn_t normal_b_h;
|
vp8_lf_block_fn_t normal_b_h;
|
||||||
vp8_lf_block_fn_t simple_mb_v;
|
vp8_slf_block_fn_t simple_mb_v;
|
||||||
vp8_lf_block_fn_t simple_b_v;
|
vp8_slf_block_fn_t simple_b_v;
|
||||||
vp8_lf_block_fn_t simple_mb_h;
|
vp8_slf_block_fn_t simple_mb_h;
|
||||||
vp8_lf_block_fn_t simple_b_h;
|
vp8_slf_block_fn_t simple_b_h;
|
||||||
} vp8_loopfilter_rtcd_vtable_t;
|
} vp8_loopfilter_rtcd_vtable_t;
|
||||||
|
|
||||||
#if CONFIG_RUNTIME_CPU_DETECT
|
#if CONFIG_RUNTIME_CPU_DETECT
|
||||||
@ -115,9 +136,9 @@ typedef void loop_filter_uvfunction
|
|||||||
(
|
(
|
||||||
unsigned char *u, /* source pointer */
|
unsigned char *u, /* source pointer */
|
||||||
int p, /* pitch */
|
int p, /* pitch */
|
||||||
const signed char *flimit,
|
const unsigned char *blimit,
|
||||||
const signed char *limit,
|
const unsigned char *limit,
|
||||||
const signed char *thresh,
|
const unsigned char *thresh,
|
||||||
unsigned char *v
|
unsigned char *v
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -24,8 +24,9 @@ static __inline signed char vp8_signed_char_clamp(int t)
|
|||||||
|
|
||||||
|
|
||||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||||
static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
|
static __inline signed char vp8_filter_mask(uc limit, uc blimit,
|
||||||
uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
|
uc p3, uc p2, uc p1, uc p0,
|
||||||
|
uc q0, uc q1, uc q2, uc q3)
|
||||||
{
|
{
|
||||||
signed char mask = 0;
|
signed char mask = 0;
|
||||||
mask |= (abs(p3 - p2) > limit) * -1;
|
mask |= (abs(p3 - p2) > limit) * -1;
|
||||||
@ -34,13 +35,13 @@ static __inline signed char vp8_filter_mask(signed char limit, signed char flimi
|
|||||||
mask |= (abs(q1 - q0) > limit) * -1;
|
mask |= (abs(q1 - q0) > limit) * -1;
|
||||||
mask |= (abs(q2 - q1) > limit) * -1;
|
mask |= (abs(q2 - q1) > limit) * -1;
|
||||||
mask |= (abs(q3 - q2) > limit) * -1;
|
mask |= (abs(q3 - q2) > limit) * -1;
|
||||||
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit) * -1;
|
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
|
||||||
mask = ~mask;
|
mask = ~mask;
|
||||||
return mask;
|
return mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
|
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
|
||||||
static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
|
static __inline signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
|
||||||
{
|
{
|
||||||
signed char hev = 0;
|
signed char hev = 0;
|
||||||
hev |= (abs(p1 - p0) > thresh) * -1;
|
hev |= (abs(p1 - p0) > thresh) * -1;
|
||||||
@ -48,7 +49,8 @@ static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0,
|
|||||||
return hev;
|
return hev;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
|
static __inline void vp8_filter(signed char mask, uc hev, uc *op1,
|
||||||
|
uc *op0, uc *oq0, uc *oq1)
|
||||||
|
|
||||||
{
|
{
|
||||||
signed char ps0, qs0;
|
signed char ps0, qs0;
|
||||||
@ -98,9 +100,9 @@ void vp8_loop_filter_horizontal_edge_c
|
|||||||
(
|
(
|
||||||
unsigned char *s,
|
unsigned char *s,
|
||||||
int p, /* pitch */
|
int p, /* pitch */
|
||||||
const signed char *flimit,
|
const unsigned char *blimit,
|
||||||
const signed char *limit,
|
const unsigned char *limit,
|
||||||
const signed char *thresh,
|
const unsigned char *thresh,
|
||||||
int count
|
int count
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
@ -113,11 +115,11 @@ void vp8_loop_filter_horizontal_edge_c
|
|||||||
*/
|
*/
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||||
|
|
||||||
hev = vp8_hevmask(thresh[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||||
|
|
||||||
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||||
|
|
||||||
@ -130,9 +132,9 @@ void vp8_loop_filter_vertical_edge_c
|
|||||||
(
|
(
|
||||||
unsigned char *s,
|
unsigned char *s,
|
||||||
int p,
|
int p,
|
||||||
const signed char *flimit,
|
const unsigned char *blimit,
|
||||||
const signed char *limit,
|
const unsigned char *limit,
|
||||||
const signed char *thresh,
|
const unsigned char *thresh,
|
||||||
int count
|
int count
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
@ -145,10 +147,10 @@ void vp8_loop_filter_vertical_edge_c
|
|||||||
*/
|
*/
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||||
|
|
||||||
hev = vp8_hevmask(thresh[i], s[-2], s[-1], s[0], s[1]);
|
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||||
|
|
||||||
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
|
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
|
||||||
|
|
||||||
@ -157,7 +159,7 @@ void vp8_loop_filter_vertical_edge_c
|
|||||||
while (++i < count * 8);
|
while (++i < count * 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline void vp8_mbfilter(signed char mask, signed char hev,
|
static __inline void vp8_mbfilter(signed char mask, uc hev,
|
||||||
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
|
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
|
||||||
{
|
{
|
||||||
signed char s, u;
|
signed char s, u;
|
||||||
@ -216,9 +218,9 @@ void vp8_mbloop_filter_horizontal_edge_c
|
|||||||
(
|
(
|
||||||
unsigned char *s,
|
unsigned char *s,
|
||||||
int p,
|
int p,
|
||||||
const signed char *flimit,
|
const unsigned char *blimit,
|
||||||
const signed char *limit,
|
const unsigned char *limit,
|
||||||
const signed char *thresh,
|
const unsigned char *thresh,
|
||||||
int count
|
int count
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
@ -232,11 +234,11 @@ void vp8_mbloop_filter_horizontal_edge_c
|
|||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
|
||||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||||
|
|
||||||
hev = vp8_hevmask(thresh[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||||
|
|
||||||
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
|
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
|
||||||
|
|
||||||
@ -251,9 +253,9 @@ void vp8_mbloop_filter_vertical_edge_c
|
|||||||
(
|
(
|
||||||
unsigned char *s,
|
unsigned char *s,
|
||||||
int p,
|
int p,
|
||||||
const signed char *flimit,
|
const unsigned char *blimit,
|
||||||
const signed char *limit,
|
const unsigned char *limit,
|
||||||
const signed char *thresh,
|
const unsigned char *thresh,
|
||||||
int count
|
int count
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
@ -264,10 +266,10 @@ void vp8_mbloop_filter_vertical_edge_c
|
|||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
|
||||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||||
|
|
||||||
hev = vp8_hevmask(thresh[i], s[-2], s[-1], s[0], s[1]);
|
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||||
|
|
||||||
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
|
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
|
||||||
|
|
||||||
@ -278,13 +280,13 @@ void vp8_mbloop_filter_vertical_edge_c
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||||
static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
|
static __inline signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
|
||||||
{
|
{
|
||||||
/* Why does this cause problems for win32?
|
/* Why does this cause problems for win32?
|
||||||
* error C2143: syntax error : missing ';' before 'type'
|
* error C2143: syntax error : missing ';' before 'type'
|
||||||
* (void) limit;
|
* (void) limit;
|
||||||
*/
|
*/
|
||||||
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= flimit * 2 + limit) * -1;
|
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
|
||||||
return mask;
|
return mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -317,47 +319,37 @@ void vp8_loop_filter_simple_horizontal_edge_c
|
|||||||
(
|
(
|
||||||
unsigned char *s,
|
unsigned char *s,
|
||||||
int p,
|
int p,
|
||||||
const signed char *flimit,
|
const unsigned char *blimit
|
||||||
const signed char *limit,
|
|
||||||
const signed char *thresh,
|
|
||||||
int count
|
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
signed char mask = 0;
|
signed char mask = 0;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
(void) thresh;
|
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
/*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);*/
|
mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||||
mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
|
||||||
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||||
++s;
|
++s;
|
||||||
}
|
}
|
||||||
while (++i < count * 8);
|
while (++i < 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp8_loop_filter_simple_vertical_edge_c
|
void vp8_loop_filter_simple_vertical_edge_c
|
||||||
(
|
(
|
||||||
unsigned char *s,
|
unsigned char *s,
|
||||||
int p,
|
int p,
|
||||||
const signed char *flimit,
|
const unsigned char *blimit
|
||||||
const signed char *limit,
|
|
||||||
const signed char *thresh,
|
|
||||||
int count
|
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
signed char mask = 0;
|
signed char mask = 0;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
(void) thresh;
|
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
/*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);*/
|
mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
|
||||||
mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2], s[-1], s[0], s[1]);
|
|
||||||
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
|
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
|
||||||
s += p;
|
s += p;
|
||||||
}
|
}
|
||||||
while (++i < count * 8);
|
while (++i < 16);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -83,6 +83,7 @@ typedef struct VP8_COMMON_RTCD
|
|||||||
} VP8_COMMON_RTCD;
|
} VP8_COMMON_RTCD;
|
||||||
|
|
||||||
typedef struct VP8Common
|
typedef struct VP8Common
|
||||||
|
|
||||||
{
|
{
|
||||||
struct vpx_internal_error_info error;
|
struct vpx_internal_error_info error;
|
||||||
|
|
||||||
@ -107,7 +108,8 @@ typedef struct VP8Common
|
|||||||
YV12_BUFFER_CONFIG post_proc_buffer;
|
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||||
YV12_BUFFER_CONFIG temp_scale_frame;
|
YV12_BUFFER_CONFIG temp_scale_frame;
|
||||||
|
|
||||||
FRAME_TYPE last_frame_type; /* Save last frame's frame type for loopfilter init checking and motion search. */
|
|
||||||
|
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
|
||||||
FRAME_TYPE frame_type;
|
FRAME_TYPE frame_type;
|
||||||
|
|
||||||
int show_frame;
|
int show_frame;
|
||||||
@ -149,11 +151,9 @@ typedef struct VP8Common
|
|||||||
INTERPOLATIONFILTERTYPE mcomp_filter_type;
|
INTERPOLATIONFILTERTYPE mcomp_filter_type;
|
||||||
LOOPFILTERTYPE last_filter_type;
|
LOOPFILTERTYPE last_filter_type;
|
||||||
LOOPFILTERTYPE filter_type;
|
LOOPFILTERTYPE filter_type;
|
||||||
loop_filter_info lf_info[MAX_LOOP_FILTER+1];
|
|
||||||
prototype_loopfilter_block((*lf_mbv));
|
loop_filter_info_n lf_info;
|
||||||
prototype_loopfilter_block((*lf_mbh));
|
|
||||||
prototype_loopfilter_block((*lf_bv));
|
|
||||||
prototype_loopfilter_block((*lf_bh));
|
|
||||||
int filter_level;
|
int filter_level;
|
||||||
int last_sharpness_level;
|
int last_sharpness_level;
|
||||||
int sharpness_level;
|
int sharpness_level;
|
||||||
@ -206,10 +206,9 @@ typedef struct VP8Common
|
|||||||
struct postproc_state postproc_state;
|
struct postproc_state postproc_state;
|
||||||
} VP8_COMMON;
|
} VP8_COMMON;
|
||||||
|
|
||||||
|
void vp8_loop_filter_init(VP8_COMMON *cm);
|
||||||
int vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int filter_level);
|
void vp8_loop_filter_frame_init(VP8_COMMON *cm, MACROBLOCKD *mbd,
|
||||||
void vp8_init_loop_filter(VP8_COMMON *cm);
|
int default_filt_lvl, int sharpness_lvl);
|
||||||
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type);
|
void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
|
||||||
extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -122,12 +122,10 @@ next8_h:
|
|||||||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||||
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
mov rdx, arg(2) ;flimit ; get flimit
|
mov rdx, arg(2) ;blimit ; get blimit
|
||||||
movq mm2, [rdx] ; flimit mm2
|
movq mm7, [rdx] ; blimit
|
||||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
|
||||||
paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
por mm1, mm5
|
por mm1, mm5
|
||||||
pxor mm5, mm5
|
pxor mm5, mm5
|
||||||
pcmpeqb mm1, mm5 ; mask mm1
|
pcmpeqb mm1, mm5 ; mask mm1
|
||||||
@ -230,7 +228,7 @@ next8_h:
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -406,9 +404,9 @@ next8_v:
|
|||||||
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||||
psrlw mm5, 1 ; abs(p1-q1)/2
|
psrlw mm5, 1 ; abs(p1-q1)/2
|
||||||
|
|
||||||
mov rdx, arg(2) ;flimit ;
|
mov rdx, arg(2) ;blimit ;
|
||||||
|
|
||||||
movq mm2, [rdx] ;flimit mm2
|
movq mm4, [rdx] ;blimit
|
||||||
movq mm1, mm3 ; mm1=mm3=p0
|
movq mm1, mm3 ; mm1=mm3=p0
|
||||||
|
|
||||||
movq mm7, mm6 ; mm7=mm6=q0
|
movq mm7, mm6 ; mm7=mm6=q0
|
||||||
@ -419,10 +417,7 @@ next8_v:
|
|||||||
paddusb mm1, mm1 ; abs(q0-p0)*2
|
paddusb mm1, mm1 ; abs(q0-p0)*2
|
||||||
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
|
||||||
por mm1, mm0; ; mask
|
por mm1, mm0; ; mask
|
||||||
|
|
||||||
pxor mm0, mm0
|
pxor mm0, mm0
|
||||||
@ -603,7 +598,7 @@ next8_v:
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -719,17 +714,15 @@ next8_mbh:
|
|||||||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||||
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
mov rdx, arg(2) ;flimit ; get flimit
|
mov rdx, arg(2) ;blimit ; get blimit
|
||||||
movq mm2, [rdx] ; flimit mm2
|
movq mm7, [rdx] ; blimit
|
||||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
|
||||||
paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
por mm1, mm5
|
por mm1, mm5
|
||||||
pxor mm5, mm5
|
pxor mm5, mm5
|
||||||
pcmpeqb mm1, mm5 ; mask mm1
|
pcmpeqb mm1, mm5 ; mask mm1
|
||||||
|
|
||||||
; mm1 = mask, mm0=q0, mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
|
; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
|
||||||
; mm6 = p0,
|
; mm6 = p0,
|
||||||
|
|
||||||
; calculate high edge variance
|
; calculate high edge variance
|
||||||
@ -922,7 +915,7 @@ next8_mbh:
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -1108,9 +1101,9 @@ next8_mbv:
|
|||||||
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||||
psrlw mm5, 1 ; abs(p1-q1)/2
|
psrlw mm5, 1 ; abs(p1-q1)/2
|
||||||
|
|
||||||
mov rdx, arg(2) ;flimit ;
|
mov rdx, arg(2) ;blimit ;
|
||||||
|
|
||||||
movq mm2, [rdx] ;flimit mm2
|
movq mm4, [rdx] ;blimit
|
||||||
movq mm1, mm3 ; mm1=mm3=p0
|
movq mm1, mm3 ; mm1=mm3=p0
|
||||||
|
|
||||||
movq mm7, mm6 ; mm7=mm6=q0
|
movq mm7, mm6 ; mm7=mm6=q0
|
||||||
@ -1121,10 +1114,7 @@ next8_mbv:
|
|||||||
paddusb mm1, mm1 ; abs(q0-p0)*2
|
paddusb mm1, mm1 ; abs(q0-p0)*2
|
||||||
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
|
||||||
por mm1, mm0; ; mask
|
por mm1, mm0; ; mask
|
||||||
|
|
||||||
pxor mm0, mm0
|
pxor mm0, mm0
|
||||||
@ -1392,16 +1382,13 @@ next8_mbv:
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit
|
||||||
; const char *limit,
|
|
||||||
; const char *thresh,
|
|
||||||
; int count
|
|
||||||
;)
|
;)
|
||||||
global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
|
global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
|
||||||
sym(vp8_loop_filter_simple_horizontal_edge_mmx):
|
sym(vp8_loop_filter_simple_horizontal_edge_mmx):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 6
|
SHADOW_ARGS_TO_STACK 3
|
||||||
GET_GOT rbx
|
GET_GOT rbx
|
||||||
push rsi
|
push rsi
|
||||||
push rdi
|
push rdi
|
||||||
@ -1410,14 +1397,10 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx):
|
|||||||
mov rsi, arg(0) ;src_ptr
|
mov rsi, arg(0) ;src_ptr
|
||||||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||||
|
|
||||||
movsxd rcx, dword ptr arg(5) ;count
|
mov rcx, 2 ; count
|
||||||
nexts8_h:
|
nexts8_h:
|
||||||
mov rdx, arg(3) ;limit
|
mov rdx, arg(2) ;blimit ; get blimit
|
||||||
movq mm7, [rdx]
|
|
||||||
mov rdx, arg(2) ;flimit ; get flimit
|
|
||||||
movq mm3, [rdx] ;
|
movq mm3, [rdx] ;
|
||||||
paddb mm3, mm3 ; flimit*2 (less than 255)
|
|
||||||
paddb mm3, mm7 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
||||||
add rdi, rax
|
add rdi, rax
|
||||||
@ -1445,7 +1428,7 @@ nexts8_h:
|
|||||||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||||
paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
pxor mm3, mm3
|
pxor mm3, mm3
|
||||||
pcmpeqb mm5, mm3
|
pcmpeqb mm5, mm3
|
||||||
|
|
||||||
@ -1515,16 +1498,13 @@ nexts8_h:
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit
|
||||||
; const char *limit,
|
|
||||||
; const char *thresh,
|
|
||||||
; int count
|
|
||||||
;)
|
;)
|
||||||
global sym(vp8_loop_filter_simple_vertical_edge_mmx)
|
global sym(vp8_loop_filter_simple_vertical_edge_mmx)
|
||||||
sym(vp8_loop_filter_simple_vertical_edge_mmx):
|
sym(vp8_loop_filter_simple_vertical_edge_mmx):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 6
|
SHADOW_ARGS_TO_STACK 3
|
||||||
GET_GOT rbx
|
GET_GOT rbx
|
||||||
push rsi
|
push rsi
|
||||||
push rdi
|
push rdi
|
||||||
@ -1539,7 +1519,7 @@ sym(vp8_loop_filter_simple_vertical_edge_mmx):
|
|||||||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||||
|
|
||||||
lea rsi, [rsi + rax*4- 2]; ;
|
lea rsi, [rsi + rax*4- 2]; ;
|
||||||
movsxd rcx, dword ptr arg(5) ;count
|
mov rcx, 2 ; count
|
||||||
nexts8_v:
|
nexts8_v:
|
||||||
|
|
||||||
lea rdi, [rsi + rax];
|
lea rdi, [rsi + rax];
|
||||||
@ -1602,14 +1582,10 @@ nexts8_v:
|
|||||||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||||
paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
mov rdx, arg(2) ;flimit ; get flimit
|
mov rdx, arg(2) ;blimit ; get blimit
|
||||||
movq mm7, [rdx]
|
movq mm7, [rdx]
|
||||||
mov rdx, arg(3) ; get limit
|
|
||||||
movq mm6, [rdx]
|
|
||||||
paddb mm7, mm7 ; flimit*2 (less than 255)
|
|
||||||
paddb mm7, mm6 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
pxor mm7, mm7
|
pxor mm7, mm7
|
||||||
pcmpeqb mm5, mm7 ; mm5 = mask
|
pcmpeqb mm5, mm7 ; mm5 = mask
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@
|
|||||||
psubusb xmm6, xmm5 ; p1-=p0
|
psubusb xmm6, xmm5 ; p1-=p0
|
||||||
|
|
||||||
por xmm6, xmm4 ; abs(p1 - p0)
|
por xmm6, xmm4 ; abs(p1 - p0)
|
||||||
mov rdx, arg(2) ; get flimit
|
mov rdx, arg(2) ; get blimit
|
||||||
|
|
||||||
movdqa t1, xmm6 ; save to t1
|
movdqa t1, xmm6 ; save to t1
|
||||||
|
|
||||||
@ -123,7 +123,7 @@
|
|||||||
psubusb xmm1, xmm7
|
psubusb xmm1, xmm7
|
||||||
por xmm2, xmm3 ; abs(p1-q1)
|
por xmm2, xmm3 ; abs(p1-q1)
|
||||||
|
|
||||||
movdqa xmm4, XMMWORD PTR [rdx] ; flimit
|
movdqa xmm7, XMMWORD PTR [rdx] ; blimit
|
||||||
|
|
||||||
movdqa xmm3, xmm0 ; q0
|
movdqa xmm3, xmm0 ; q0
|
||||||
pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||||
@ -134,13 +134,11 @@
|
|||||||
psrlw xmm2, 1 ; abs(p1-q1)/2
|
psrlw xmm2, 1 ; abs(p1-q1)/2
|
||||||
|
|
||||||
psubusb xmm5, xmm3 ; p0-=q0
|
psubusb xmm5, xmm3 ; p0-=q0
|
||||||
paddb xmm4, xmm4 ; flimit*2 (less than 255)
|
|
||||||
|
|
||||||
psubusb xmm3, xmm6 ; q0-=p0
|
psubusb xmm3, xmm6 ; q0-=p0
|
||||||
por xmm5, xmm3 ; abs(p0 - q0)
|
por xmm5, xmm3 ; abs(p0 - q0)
|
||||||
|
|
||||||
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
||||||
paddb xmm7, xmm4 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
movdqa xmm4, t0 ; hev get abs (q1 - q0)
|
movdqa xmm4, t0 ; hev get abs (q1 - q0)
|
||||||
|
|
||||||
@ -150,7 +148,7 @@
|
|||||||
|
|
||||||
movdqa xmm2, XMMWORD PTR [rdx] ; hev
|
movdqa xmm2, XMMWORD PTR [rdx] ; hev
|
||||||
|
|
||||||
psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
psubusb xmm4, xmm2 ; hev
|
psubusb xmm4, xmm2 ; hev
|
||||||
|
|
||||||
psubusb xmm3, xmm2 ; hev
|
psubusb xmm3, xmm2 ; hev
|
||||||
@ -278,7 +276,7 @@
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -328,7 +326,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -574,7 +572,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -624,7 +622,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *u,
|
; unsigned char *u,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; unsigned char *v
|
; unsigned char *v
|
||||||
@ -904,7 +902,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||||||
movdqa xmm4, XMMWORD PTR [rdx]; limit
|
movdqa xmm4, XMMWORD PTR [rdx]; limit
|
||||||
|
|
||||||
pmaxub xmm0, xmm7
|
pmaxub xmm0, xmm7
|
||||||
mov rdx, arg(2) ; flimit
|
mov rdx, arg(2) ; blimit
|
||||||
|
|
||||||
psubusb xmm0, xmm4
|
psubusb xmm0, xmm4
|
||||||
movdqa xmm5, xmm2 ; q1
|
movdqa xmm5, xmm2 ; q1
|
||||||
@ -921,12 +919,11 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||||||
psrlw xmm5, 1 ; abs(p1-q1)/2
|
psrlw xmm5, 1 ; abs(p1-q1)/2
|
||||||
psubusb xmm6, xmm3 ; q0-p0
|
psubusb xmm6, xmm3 ; q0-p0
|
||||||
|
|
||||||
movdqa xmm2, XMMWORD PTR [rdx]; flimit
|
movdqa xmm4, XMMWORD PTR [rdx]; blimit
|
||||||
|
|
||||||
mov rdx, arg(4) ; get thresh
|
mov rdx, arg(4) ; get thresh
|
||||||
|
|
||||||
por xmm1, xmm6 ; abs(q0-p0)
|
por xmm1, xmm6 ; abs(q0-p0)
|
||||||
paddb xmm2, xmm2 ; flimit*2 (less than 255)
|
|
||||||
|
|
||||||
movdqa xmm6, t0 ; get abs (q1 - q0)
|
movdqa xmm6, t0 ; get abs (q1 - q0)
|
||||||
|
|
||||||
@ -939,10 +936,9 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||||||
paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh
|
psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh
|
||||||
|
|
||||||
paddb xmm4, xmm2 ; flimit * 2 + limit (less than 255)
|
|
||||||
psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh
|
psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh
|
||||||
|
|
||||||
psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
|
por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
|
||||||
|
|
||||||
por xmm1, xmm0 ; mask
|
por xmm1, xmm0 ; mask
|
||||||
@ -1014,7 +1010,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -1081,7 +1077,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *u,
|
; unsigned char *u,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; unsigned char *v
|
; unsigned char *v
|
||||||
@ -1239,7 +1235,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; int count
|
; int count
|
||||||
@ -1308,7 +1304,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *u,
|
; unsigned char *u,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
; const char *limit,
|
||||||
; const char *thresh,
|
; const char *thresh,
|
||||||
; unsigned char *v
|
; unsigned char *v
|
||||||
@ -1376,16 +1372,13 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
|
||||||
; const char *thresh,
|
|
||||||
; int count
|
|
||||||
;)
|
;)
|
||||||
global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
|
global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
|
||||||
sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 6
|
SHADOW_ARGS_TO_STACK 3
|
||||||
SAVE_XMM 7
|
SAVE_XMM 7
|
||||||
GET_GOT rbx
|
GET_GOT rbx
|
||||||
push rsi
|
push rsi
|
||||||
@ -1394,13 +1387,8 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||||||
|
|
||||||
mov rsi, arg(0) ;src_ptr
|
mov rsi, arg(0) ;src_ptr
|
||||||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||||
mov rdx, arg(2) ;flimit ; get flimit
|
mov rdx, arg(2) ;blimit
|
||||||
movdqa xmm3, XMMWORD PTR [rdx]
|
movdqa xmm3, XMMWORD PTR [rdx]
|
||||||
mov rdx, arg(3) ;limit
|
|
||||||
movdqa xmm7, XMMWORD PTR [rdx]
|
|
||||||
|
|
||||||
paddb xmm3, xmm3 ; flimit*2 (less than 255)
|
|
||||||
paddb xmm3, xmm7 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
||||||
add rdi, rax
|
add rdi, rax
|
||||||
@ -1428,7 +1416,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||||||
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
||||||
paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
pxor xmm3, xmm3
|
pxor xmm3, xmm3
|
||||||
pcmpeqb xmm5, xmm3
|
pcmpeqb xmm5, xmm3
|
||||||
|
|
||||||
@ -1493,16 +1481,13 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||||||
;(
|
;(
|
||||||
; unsigned char *src_ptr,
|
; unsigned char *src_ptr,
|
||||||
; int src_pixel_step,
|
; int src_pixel_step,
|
||||||
; const char *flimit,
|
; const char *blimit,
|
||||||
; const char *limit,
|
|
||||||
; const char *thresh,
|
|
||||||
; int count
|
|
||||||
;)
|
;)
|
||||||
global sym(vp8_loop_filter_simple_vertical_edge_sse2)
|
global sym(vp8_loop_filter_simple_vertical_edge_sse2)
|
||||||
sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
||||||
push rbp ; save old base pointer value.
|
push rbp ; save old base pointer value.
|
||||||
mov rbp, rsp ; set new base pointer value.
|
mov rbp, rsp ; set new base pointer value.
|
||||||
SHADOW_ARGS_TO_STACK 6
|
SHADOW_ARGS_TO_STACK 3
|
||||||
SAVE_XMM 7
|
SAVE_XMM 7
|
||||||
GET_GOT rbx ; save callee-saved reg
|
GET_GOT rbx ; save callee-saved reg
|
||||||
push rsi
|
push rsi
|
||||||
@ -1607,14 +1592,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
|||||||
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
||||||
paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||||
|
|
||||||
mov rdx, arg(2) ;flimit
|
mov rdx, arg(2) ;blimit
|
||||||
movdqa xmm7, XMMWORD PTR [rdx]
|
movdqa xmm7, XMMWORD PTR [rdx]
|
||||||
mov rdx, arg(3) ; get limit
|
|
||||||
movdqa xmm6, XMMWORD PTR [rdx]
|
|
||||||
paddb xmm7, xmm7 ; flimit*2 (less than 255)
|
|
||||||
paddb xmm7, xmm6 ; flimit * 2 + limit (less than 255)
|
|
||||||
|
|
||||||
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||||
pxor xmm7, xmm7
|
pxor xmm7, xmm7
|
||||||
pcmpeqb xmm5, xmm7 ; mm5 = mask
|
pcmpeqb xmm5, xmm7 ; mm5 = mask
|
||||||
|
|
||||||
|
@ -9,30 +9,18 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "vpx_ports/config.h"
|
#include "vpx_config.h"
|
||||||
#include "vp8/common/loopfilter.h"
|
#include "vp8/common/loopfilter.h"
|
||||||
|
|
||||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
|
|
||||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
|
|
||||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
|
|
||||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
|
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
|
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
|
|
||||||
|
|
||||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
|
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
|
||||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
|
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
|
||||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx);
|
prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx);
|
||||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx);
|
prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx);
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
|
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
|
|
||||||
|
|
||||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2);
|
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2);
|
||||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2);
|
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2);
|
||||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2);
|
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2);
|
||||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2);
|
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2);
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
|
|
||||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
|
|
||||||
prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
|
|
||||||
|
|
||||||
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
|
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
|
||||||
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
|
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
|
||||||
@ -44,23 +32,13 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
|
|||||||
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -68,23 +46,13 @@ void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
|||||||
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -92,27 +60,23 @@ void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
|||||||
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
{
|
||||||
(void) u_ptr;
|
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||||
(void) v_ptr;
|
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||||
(void) uv_stride;
|
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -120,27 +84,23 @@ void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
|||||||
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
|
|
||||||
if (v_ptr)
|
if (v_ptr)
|
||||||
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
{
|
||||||
(void) u_ptr;
|
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
|
||||||
(void) v_ptr;
|
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
|
||||||
(void) uv_stride;
|
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
|
||||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -150,20 +110,10 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
|||||||
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -171,20 +121,10 @@ void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
|||||||
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
(void) u_ptr;
|
|
||||||
(void) v_ptr;
|
|
||||||
(void) uv_stride;
|
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -192,24 +132,20 @@ void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
|||||||
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride);
|
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
{
|
||||||
(void) u_ptr;
|
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||||
(void) v_ptr;
|
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||||
(void) uv_stride;
|
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -217,36 +153,20 @@ void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
|||||||
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4);
|
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
|
||||||
{
|
{
|
||||||
(void) u_ptr;
|
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit);
|
||||||
(void) v_ptr;
|
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit);
|
||||||
(void) uv_stride;
|
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit);
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 0
|
|
||||||
void vp8_fast_loop_filter_vertical_edges_sse(unsigned char *y_ptr,
|
|
||||||
int y_stride,
|
|
||||||
loop_filter_info *lfi)
|
|
||||||
{
|
|
||||||
|
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
@ -24,10 +24,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_mmx);
|
|||||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_mmx);
|
extern prototype_loopfilter_block(vp8_loop_filter_bv_mmx);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_mmx);
|
extern prototype_loopfilter_block(vp8_loop_filter_mbh_mmx);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_mmx);
|
extern prototype_loopfilter_block(vp8_loop_filter_bh_mmx);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_mmx);
|
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_mmx);
|
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_mmx);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_mmx);
|
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
|
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_mmx);
|
||||||
|
|
||||||
|
|
||||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||||
@ -44,13 +44,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
|
|||||||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_mmx
|
#define vp8_lf_normal_b_h vp8_loop_filter_bh_mmx
|
||||||
|
|
||||||
#undef vp8_lf_simple_mb_v
|
#undef vp8_lf_simple_mb_v
|
||||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_mmx
|
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_mmx
|
||||||
|
|
||||||
#undef vp8_lf_simple_b_v
|
#undef vp8_lf_simple_b_v
|
||||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_mmx
|
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_mmx
|
||||||
|
|
||||||
#undef vp8_lf_simple_mb_h
|
#undef vp8_lf_simple_mb_h
|
||||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_mmx
|
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_mmx
|
||||||
|
|
||||||
#undef vp8_lf_simple_b_h
|
#undef vp8_lf_simple_b_h
|
||||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_mmx
|
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_mmx
|
||||||
@ -63,10 +63,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_sse2);
|
|||||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_sse2);
|
extern prototype_loopfilter_block(vp8_loop_filter_bv_sse2);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_sse2);
|
extern prototype_loopfilter_block(vp8_loop_filter_mbh_sse2);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_sse2);
|
extern prototype_loopfilter_block(vp8_loop_filter_bh_sse2);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_sse2);
|
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_sse2);
|
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_sse2);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_sse2);
|
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
|
||||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
|
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_sse2);
|
||||||
|
|
||||||
|
|
||||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||||
@ -83,13 +83,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
|
|||||||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_sse2
|
#define vp8_lf_normal_b_h vp8_loop_filter_bh_sse2
|
||||||
|
|
||||||
#undef vp8_lf_simple_mb_v
|
#undef vp8_lf_simple_mb_v
|
||||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_sse2
|
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_sse2
|
||||||
|
|
||||||
#undef vp8_lf_simple_b_v
|
#undef vp8_lf_simple_b_v
|
||||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_sse2
|
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_sse2
|
||||||
|
|
||||||
#undef vp8_lf_simple_mb_h
|
#undef vp8_lf_simple_mb_h
|
||||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_sse2
|
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_sse2
|
||||||
|
|
||||||
#undef vp8_lf_simple_b_h
|
#undef vp8_lf_simple_b_h
|
||||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_sse2
|
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_sse2
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "vpx_ports/config.h"
|
#include "vpx_config.h"
|
||||||
#include "vpx_ports/x86.h"
|
#include "vpx_ports/x86.h"
|
||||||
#include "vp8/common/g_common.h"
|
#include "vp8/common/g_common.h"
|
||||||
#include "vp8/common/subpixel.h"
|
#include "vp8/common/subpixel.h"
|
||||||
@ -63,9 +63,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
|||||||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_mmx;
|
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_mmx;
|
||||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_mmx;
|
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_mmx;
|
||||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_mmx;
|
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_mmx;
|
||||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_mmx;
|
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_mmx;
|
||||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_mmx;
|
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_mmx;
|
||||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_mmx;
|
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_mmx;
|
||||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_mmx;
|
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_mmx;
|
||||||
|
|
||||||
#if CONFIG_POSTPROC
|
#if CONFIG_POSTPROC
|
||||||
@ -101,9 +101,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
|||||||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_sse2;
|
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_sse2;
|
||||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_sse2;
|
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_sse2;
|
||||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_sse2;
|
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_sse2;
|
||||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_sse2;
|
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_sse2;
|
||||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_sse2;
|
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_sse2;
|
||||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_sse2;
|
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_sse2;
|
||||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_sse2;
|
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_sse2;
|
||||||
|
|
||||||
#if CONFIG_POSTPROC
|
#if CONFIG_POSTPROC
|
||||||
|
@ -180,11 +180,11 @@ static MB_PREDICTION_MODE read_mv_ref(vp8_reader *bc, const vp8_prob *p)
|
|||||||
return (MB_PREDICTION_MODE)i;
|
return (MB_PREDICTION_MODE)i;
|
||||||
}
|
}
|
||||||
|
|
||||||
static MB_PREDICTION_MODE sub_mv_ref(vp8_reader *bc, const vp8_prob *p)
|
static B_PREDICTION_MODE sub_mv_ref(vp8_reader *bc, const vp8_prob *p)
|
||||||
{
|
{
|
||||||
const int i = vp8_treed_read(bc, vp8_sub_mv_ref_tree, p);
|
const int i = vp8_treed_read(bc, vp8_sub_mv_ref_tree, p);
|
||||||
|
|
||||||
return (MB_PREDICTION_MODE)i;
|
return (B_PREDICTION_MODE)i;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef VPX_MODE_COUNT
|
#ifdef VPX_MODE_COUNT
|
||||||
@ -334,7 +334,7 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
|
|||||||
abovemv.as_int = above_block_mv(mi, k, mis);
|
abovemv.as_int = above_block_mv(mi, k, mis);
|
||||||
mv_contz = vp8_mv_cont(&leftmv, &abovemv);
|
mv_contz = vp8_mv_cont(&leftmv, &abovemv);
|
||||||
|
|
||||||
switch ((B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) /*pc->fc.sub_mv_ref_prob))*/
|
switch (sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) /*pc->fc.sub_mv_ref_prob))*/
|
||||||
{
|
{
|
||||||
case NEW4X4:
|
case NEW4X4:
|
||||||
read_mv(bc, &blockmv.as_mv, (const MV_CONTEXT *) mvc);
|
read_mv(bc, &blockmv.as_mv, (const MV_CONTEXT *) mvc);
|
||||||
|
@ -95,7 +95,7 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
|
|||||||
{
|
{
|
||||||
VP8_COMMON *cm = &pbi->common;
|
VP8_COMMON *cm = &pbi->common;
|
||||||
|
|
||||||
vp8_init_loop_filter(cm);
|
vp8_loop_filter_init(cm);
|
||||||
cm->last_frame_type = KEY_FRAME;
|
cm->last_frame_type = KEY_FRAME;
|
||||||
cm->last_filter_type = cm->filter_type;
|
cm->last_filter_type = cm->filter_type;
|
||||||
cm->last_sharpness_level = cm->sharpness_level;
|
cm->last_sharpness_level = cm->sharpness_level;
|
||||||
|
@ -274,9 +274,7 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
|
|||||||
int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
|
int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
|
||||||
|
|
||||||
int filter_level;
|
int filter_level;
|
||||||
loop_filter_info *lfi = pc->lf_info;
|
loop_filter_info_n *lfi_n = &pc->lf_info;
|
||||||
int alt_flt_enabled = xd->segmentation_enabled;
|
|
||||||
int Segment;
|
|
||||||
|
|
||||||
pbi->mb_row_di[ithread].mb_row = mb_row;
|
pbi->mb_row_di[ithread].mb_row = mb_row;
|
||||||
pbi->mb_row_di[ithread].mbd.current_bc = &pbi->mbc[mb_row%num_part];
|
pbi->mb_row_di[ithread].mbd.current_bc = &pbi->mbc[mb_row%num_part];
|
||||||
@ -362,7 +360,16 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
|
|||||||
|
|
||||||
if (pbi->common.filter_level)
|
if (pbi->common.filter_level)
|
||||||
{
|
{
|
||||||
int skip_lf;
|
int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
|
||||||
|
xd->mode_info_context->mbmi.mode != SPLITMV &&
|
||||||
|
xd->mode_info_context->mbmi.mb_skip_coeff);
|
||||||
|
|
||||||
|
const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
|
||||||
|
const int seg = xd->mode_info_context->mbmi.segment_id;
|
||||||
|
const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
|
||||||
|
|
||||||
|
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||||
|
|
||||||
if( mb_row != pc->mb_rows-1 )
|
if( mb_row != pc->mb_rows-1 )
|
||||||
{
|
{
|
||||||
/* Save decoded MB last row data for next-row decoding */
|
/* Save decoded MB last row data for next-row decoding */
|
||||||
@ -388,35 +395,57 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* update loopfilter info */
|
|
||||||
Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
|
|
||||||
skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
|
|
||||||
xd->mode_info_context->mbmi.mode != SPLITMV &&
|
|
||||||
xd->mode_info_context->mbmi.mb_skip_coeff);
|
|
||||||
|
|
||||||
filter_level = pbi->mt_baseline_filter_level[Segment];
|
|
||||||
/* Distance of Mb to the various image edges.
|
|
||||||
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
|
||||||
* Apply any context driven MB level adjustment
|
|
||||||
*/
|
|
||||||
filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
|
|
||||||
|
|
||||||
/* loopfilter on this macroblock. */
|
/* loopfilter on this macroblock. */
|
||||||
if (filter_level)
|
if (filter_level)
|
||||||
{
|
{
|
||||||
if (mb_col > 0)
|
if(pc->filter_type == NORMAL_LOOPFILTER)
|
||||||
pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
{
|
||||||
|
loop_filter_info lfi;
|
||||||
|
FRAME_TYPE frame_type = pc->frame_type;
|
||||||
|
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||||
|
lfi.mblim = lfi_n->mblim[filter_level];
|
||||||
|
lfi.blim = lfi_n->blim[filter_level];
|
||||||
|
lfi.lim = lfi_n->lim[filter_level];
|
||||||
|
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||||
|
|
||||||
if (!skip_lf)
|
if (mb_col > 0)
|
||||||
pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_v)
|
||||||
|
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||||
|
|
||||||
/* don't apply across umv border */
|
if (!skip_lf)
|
||||||
if (mb_row > 0)
|
LF_INVOKE(&pc->rtcd.loopfilter, normal_b_v)
|
||||||
pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||||
|
|
||||||
if (!skip_lf)
|
/* don't apply across umv border */
|
||||||
pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
if (mb_row > 0)
|
||||||
|
LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_h)
|
||||||
|
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&pc->rtcd.loopfilter, normal_b_h)
|
||||||
|
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (mb_col > 0)
|
||||||
|
LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_v)
|
||||||
|
(xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&pc->rtcd.loopfilter, simple_b_v)
|
||||||
|
(xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
|
||||||
|
|
||||||
|
/* don't apply across umv border */
|
||||||
|
if (mb_row > 0)
|
||||||
|
LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_h)
|
||||||
|
(xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&pc->rtcd.loopfilter, simple_b_h)
|
||||||
|
(xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
recon_yoffset += 16;
|
recon_yoffset += 16;
|
||||||
@ -681,53 +710,6 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
|
|
||||||
{
|
|
||||||
VP8_COMMON *cm = &pbi->common;
|
|
||||||
MACROBLOCKD *mbd = &pbi->mb;
|
|
||||||
/*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/ /*frame_to_show;*/
|
|
||||||
loop_filter_info *lfi = cm->lf_info;
|
|
||||||
FRAME_TYPE frame_type = cm->frame_type;
|
|
||||||
|
|
||||||
/*int mb_row;
|
|
||||||
int mb_col;
|
|
||||||
int baseline_filter_level[MAX_MB_SEGMENTS];*/
|
|
||||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
|
||||||
|
|
||||||
int i;
|
|
||||||
/*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
|
|
||||||
|
|
||||||
/* Note the baseline filter values for each segment */
|
|
||||||
if (alt_flt_enabled)
|
|
||||||
{
|
|
||||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
|
||||||
{
|
|
||||||
/* Abs value */
|
|
||||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
|
||||||
pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
|
||||||
/* Delta Value */
|
|
||||||
else
|
|
||||||
{
|
|
||||||
pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
|
||||||
pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
|
||||||
pbi->mt_baseline_filter_level[i] = default_filt_lvl;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Initialize the loop filter for this frame. */
|
|
||||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
|
||||||
vp8_init_loop_filter(cm);
|
|
||||||
else if (frame_type != cm->last_frame_type)
|
|
||||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||||
{
|
{
|
||||||
int mb_row;
|
int mb_row;
|
||||||
@ -738,12 +720,10 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
|||||||
volatile int *last_row_current_mb_col = NULL;
|
volatile int *last_row_current_mb_col = NULL;
|
||||||
int nsync = pbi->sync_range;
|
int nsync = pbi->sync_range;
|
||||||
|
|
||||||
int filter_level;
|
int filter_level = pc->filter_level;
|
||||||
loop_filter_info *lfi = pc->lf_info;
|
loop_filter_info_n *lfi_n = &pc->lf_info;
|
||||||
int alt_flt_enabled = xd->segmentation_enabled;
|
|
||||||
int Segment;
|
|
||||||
|
|
||||||
if(pbi->common.filter_level)
|
if (filter_level)
|
||||||
{
|
{
|
||||||
/* Set above_row buffer to 127 for decoding first MB row */
|
/* Set above_row buffer to 127 for decoding first MB row */
|
||||||
vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
|
vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
|
||||||
@ -764,7 +744,9 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
|||||||
vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
|
vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
|
||||||
vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
|
vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
|
||||||
}
|
}
|
||||||
lpf_init(pbi, pc->filter_level);
|
|
||||||
|
/* Initialize the loop filter for this frame. */
|
||||||
|
vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level, pc->sharpness_level);
|
||||||
}
|
}
|
||||||
|
|
||||||
setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
|
setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
|
||||||
@ -774,7 +756,6 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
|||||||
|
|
||||||
for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
|
for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
|
||||||
{
|
{
|
||||||
|
|
||||||
xd->current_bc = &pbi->mbc[mb_row%num_part];
|
xd->current_bc = &pbi->mbc[mb_row%num_part];
|
||||||
|
|
||||||
/* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
|
/* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
|
||||||
@ -875,7 +856,16 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
|||||||
|
|
||||||
if (pbi->common.filter_level)
|
if (pbi->common.filter_level)
|
||||||
{
|
{
|
||||||
int skip_lf;
|
int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
|
||||||
|
xd->mode_info_context->mbmi.mode != SPLITMV &&
|
||||||
|
xd->mode_info_context->mbmi.mb_skip_coeff);
|
||||||
|
|
||||||
|
const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
|
||||||
|
const int seg = xd->mode_info_context->mbmi.segment_id;
|
||||||
|
const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
|
||||||
|
|
||||||
|
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||||
|
|
||||||
/* Save decoded MB last row data for next-row decoding */
|
/* Save decoded MB last row data for next-row decoding */
|
||||||
if(mb_row != pc->mb_rows-1)
|
if(mb_row != pc->mb_rows-1)
|
||||||
{
|
{
|
||||||
@ -901,36 +891,58 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* update loopfilter info */
|
|
||||||
Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
|
|
||||||
skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
|
|
||||||
xd->mode_info_context->mbmi.mode != SPLITMV &&
|
|
||||||
xd->mode_info_context->mbmi.mb_skip_coeff);
|
|
||||||
filter_level = pbi->mt_baseline_filter_level[Segment];
|
|
||||||
/* Distance of Mb to the various image edges.
|
|
||||||
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
|
||||||
* Apply any context driven MB level adjustment
|
|
||||||
*/
|
|
||||||
filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
|
|
||||||
|
|
||||||
/* loopfilter on this macroblock. */
|
/* loopfilter on this macroblock. */
|
||||||
if (filter_level)
|
if (filter_level)
|
||||||
{
|
{
|
||||||
if (mb_col > 0)
|
if(pc->filter_type == NORMAL_LOOPFILTER)
|
||||||
pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
{
|
||||||
|
loop_filter_info lfi;
|
||||||
|
FRAME_TYPE frame_type = pc->frame_type;
|
||||||
|
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||||
|
lfi.mblim = lfi_n->mblim[filter_level];
|
||||||
|
lfi.blim = lfi_n->blim[filter_level];
|
||||||
|
lfi.lim = lfi_n->lim[filter_level];
|
||||||
|
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||||
|
|
||||||
if (!skip_lf)
|
if (mb_col > 0)
|
||||||
pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_v)
|
||||||
|
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||||
|
|
||||||
/* don't apply across umv border */
|
if (!skip_lf)
|
||||||
if (mb_row > 0)
|
LF_INVOKE(&pc->rtcd.loopfilter, normal_b_v)
|
||||||
pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||||
|
|
||||||
if (!skip_lf)
|
/* don't apply across umv border */
|
||||||
pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
if (mb_row > 0)
|
||||||
|
LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_h)
|
||||||
|
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&pc->rtcd.loopfilter, normal_b_h)
|
||||||
|
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (mb_col > 0)
|
||||||
|
LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_v)
|
||||||
|
(xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&pc->rtcd.loopfilter, simple_b_v)
|
||||||
|
(xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
|
||||||
|
|
||||||
|
/* don't apply across umv border */
|
||||||
|
if (mb_row > 0)
|
||||||
|
LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_h)
|
||||||
|
(xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
|
||||||
|
|
||||||
|
if (!skip_lf)
|
||||||
|
LF_INVOKE(&pc->rtcd.loopfilter, simple_b_h)
|
||||||
|
(xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
}
|
||||||
recon_yoffset += 16;
|
recon_yoffset += 16;
|
||||||
recon_uvoffset += 8;
|
recon_uvoffset += 8;
|
||||||
|
|
||||||
|
@ -2174,7 +2174,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
|||||||
//when needed. This will avoid unnecessary calls of vp8cx_init_quantizer() for every frame.
|
//when needed. This will avoid unnecessary calls of vp8cx_init_quantizer() for every frame.
|
||||||
vp8cx_init_quantizer(cpi);
|
vp8cx_init_quantizer(cpi);
|
||||||
{
|
{
|
||||||
vp8_init_loop_filter(cm);
|
vp8_loop_filter_init(cm);
|
||||||
cm->last_frame_type = KEY_FRAME;
|
cm->last_frame_type = KEY_FRAME;
|
||||||
cm->last_filter_type = cm->filter_type;
|
cm->last_filter_type = cm->filter_type;
|
||||||
cm->last_sharpness_level = cm->sharpness_level;
|
cm->last_sharpness_level = cm->sharpness_level;
|
||||||
|
@ -262,10 +262,19 @@ static void vp8_temporal_filter_iterate_c
|
|||||||
for (mb_row = 0; mb_row < mb_rows; mb_row++)
|
for (mb_row = 0; mb_row < mb_rows; mb_row++)
|
||||||
{
|
{
|
||||||
#if ALT_REF_MC_ENABLED
|
#if ALT_REF_MC_ENABLED
|
||||||
// Reduced search extent by 3 for 6-tap filter & smaller UMV border
|
// Source frames are extended to 16 pixels. This is different than
|
||||||
cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19));
|
// L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS)
|
||||||
|
// A 6 tap filter is used for motion search. This requires 2 pixels
|
||||||
|
// before and 3 pixels after. So the largest Y mv on a border would
|
||||||
|
// then be 16 - 3. The UV blocks are half the size of the Y and
|
||||||
|
// therefore only extended by 8. The largest mv that a UV block
|
||||||
|
// can support is 8 - 3. A UV mv is half of a Y mv.
|
||||||
|
// (16 - 3) >> 1 == 6 which is greater than 8 - 3.
|
||||||
|
// To keep the mv in play for both Y and UV planes the max that it
|
||||||
|
// can be on a border is therefore 16 - 5.
|
||||||
|
cpi->mb.mv_row_min = -((mb_row * 16) + (16 - 5));
|
||||||
cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
|
cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
|
||||||
+ (VP8BORDERINPIXELS - 19);
|
+ (16 - 5);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
||||||
@ -277,10 +286,9 @@ static void vp8_temporal_filter_iterate_c
|
|||||||
vpx_memset(count, 0, 384*sizeof(unsigned short));
|
vpx_memset(count, 0, 384*sizeof(unsigned short));
|
||||||
|
|
||||||
#if ALT_REF_MC_ENABLED
|
#if ALT_REF_MC_ENABLED
|
||||||
// Reduced search extent by 3 for 6-tap filter & smaller UMV border
|
cpi->mb.mv_col_min = -((mb_col * 16) + (16 - 5));
|
||||||
cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19));
|
|
||||||
cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
|
cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
|
||||||
+ (VP8BORDERINPIXELS - 19);
|
+ (16 - 5);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (frame = 0; frame < frame_count; frame++)
|
for (frame = 0; frame < frame_count; frame++)
|
||||||
|
@ -1,30 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Use of this source code is governed by a BSD-style license
|
|
||||||
* that can be found in the LICENSE file in the root of the source
|
|
||||||
* tree. An additional intellectual property rights grant can be found
|
|
||||||
* in the file PATENTS. All contributing project authors may
|
|
||||||
* be found in the AUTHORS file in the root of the source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef __VPX_MEM_NDS_H__
|
|
||||||
#define __VPX_MEM_NDS_H__
|
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <nitro.h>
|
|
||||||
#include <nitro/os.h>
|
|
||||||
|
|
||||||
void *vpx_mem_nds_alloc(osarena_id id, osheap_handle handle, size_t size, size_t align);
|
|
||||||
void vpx_mem_nds_free(osarena_id id, osheap_handle handle, void *mem);
|
|
||||||
int vpx_nds_alloc_heap(osarena_id id, u32 size);
|
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /*__VPX_MEM_NDS_H__*/
|
|
@ -36,9 +36,6 @@
|
|||||||
# include <winbase.h>
|
# include <winbase.h>
|
||||||
#elif defined(VXWORKS)
|
#elif defined(VXWORKS)
|
||||||
# include <sem_lib.h>
|
# include <sem_lib.h>
|
||||||
#elif defined(NDS_NITRO)
|
|
||||||
# include <nitro.h>
|
|
||||||
# include <nitro/os.h>
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@ -112,8 +109,6 @@ struct memory_tracker
|
|||||||
HANDLE mutex;
|
HANDLE mutex;
|
||||||
#elif defined(VXWORKS)
|
#elif defined(VXWORKS)
|
||||||
SEM_ID mutex;
|
SEM_ID mutex;
|
||||||
#elif defined(NDS_NITRO)
|
|
||||||
OSMutex mutex;
|
|
||||||
#elif defined(NO_MUTEX)
|
#elif defined(NO_MUTEX)
|
||||||
#else
|
#else
|
||||||
#error "No mutex type defined for this platform!"
|
#error "No mutex type defined for this platform!"
|
||||||
@ -193,9 +188,6 @@ int vpx_memory_tracker_init(int padding_size, int pad_value)
|
|||||||
memtrack.mutex = sem_bcreate(SEM_Q_FIFO, /*SEM_Q_FIFO non-priority based mutex*/
|
memtrack.mutex = sem_bcreate(SEM_Q_FIFO, /*SEM_Q_FIFO non-priority based mutex*/
|
||||||
SEM_FULL); /*SEM_FULL initial state is unlocked*/
|
SEM_FULL); /*SEM_FULL initial state is unlocked*/
|
||||||
ret = !memtrack.mutex;
|
ret = !memtrack.mutex;
|
||||||
#elif defined(NDS_NITRO)
|
|
||||||
os_init_mutex(&memtrack.mutex);
|
|
||||||
ret = 0;
|
|
||||||
#elif defined(NO_MUTEX)
|
#elif defined(NO_MUTEX)
|
||||||
ret = 0;
|
ret = 0;
|
||||||
#endif
|
#endif
|
||||||
@ -251,9 +243,7 @@ void vpx_memory_tracker_destroy()
|
|||||||
|
|
||||||
if (!g_logging.type && g_logging.file && g_logging.file != stderr)
|
if (!g_logging.type && g_logging.file && g_logging.file != stderr)
|
||||||
{
|
{
|
||||||
#if !defined(NDS_NITRO)
|
|
||||||
fclose(g_logging.file);
|
fclose(g_logging.file);
|
||||||
#endif
|
|
||||||
g_logging.file = NULL;
|
g_logging.file = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -368,15 +358,12 @@ int vpx_memory_tracker_set_log_type(int type, char *option)
|
|||||||
g_logging.file = stderr;
|
g_logging.file = stderr;
|
||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(NDS_NITRO)
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if ((g_logging.file = fopen((char *)option, "w")))
|
if ((g_logging.file = fopen((char *)option, "w")))
|
||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
#if defined(WIN32) && !defined(_WIN32_WCE)
|
#if defined(WIN32) && !defined(_WIN32_WCE)
|
||||||
case 1:
|
case 1:
|
||||||
@ -506,12 +493,6 @@ static void memory_tracker_dump()
|
|||||||
p->addr, i, p->size,
|
p->addr, i, p->size,
|
||||||
p->file, p->line);
|
p->file, p->line);
|
||||||
|
|
||||||
#ifdef NDS_NITRO
|
|
||||||
|
|
||||||
if (!(i % 20)) os_sleep(500);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
p = p->next;
|
p = p->next;
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
@ -719,9 +700,6 @@ static int memory_tracker_lock_mutex()
|
|||||||
ret = WaitForSingleObject(memtrack.mutex, INFINITE);
|
ret = WaitForSingleObject(memtrack.mutex, INFINITE);
|
||||||
#elif defined(VXWORKS)
|
#elif defined(VXWORKS)
|
||||||
ret = sem_take(memtrack.mutex, WAIT_FOREVER);
|
ret = sem_take(memtrack.mutex, WAIT_FOREVER);
|
||||||
#elif defined(NDS_NITRO)
|
|
||||||
os_lock_mutex(&memtrack.mutex);
|
|
||||||
ret = 0;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -754,9 +732,6 @@ static int memory_tracker_unlock_mutex()
|
|||||||
ret = !ReleaseMutex(memtrack.mutex);
|
ret = !ReleaseMutex(memtrack.mutex);
|
||||||
#elif defined(VXWORKS)
|
#elif defined(VXWORKS)
|
||||||
ret = sem_give(memtrack.mutex);
|
ret = sem_give(memtrack.mutex);
|
||||||
#elif defined(NDS_NITRO)
|
|
||||||
os_unlock_mutex(&memtrack.mutex);
|
|
||||||
ret = 0;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -1,221 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Use of this source code is governed by a BSD-style license
|
|
||||||
* that can be found in the LICENSE file in the root of the source
|
|
||||||
* tree. An additional intellectual property rights grant can be found
|
|
||||||
* in the file PATENTS. All contributing project authors may
|
|
||||||
* be found in the AUTHORS file in the root of the source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/****************************************************************************
|
|
||||||
*
|
|
||||||
* Module Title : yv12extend.c
|
|
||||||
*
|
|
||||||
* Description :
|
|
||||||
*
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/****************************************************************************
|
|
||||||
* Header Files
|
|
||||||
****************************************************************************/
|
|
||||||
#include "vpx_scale/yv12config.h"
|
|
||||||
#include "vpx_mem/vpx_mem.h"
|
|
||||||
#include <nitro.h>
|
|
||||||
#include <nitro/mi.h>
|
|
||||||
#include <nitro/itcm_begin.h>
|
|
||||||
|
|
||||||
//---- DMA Number
|
|
||||||
#define DMA_NO 3
|
|
||||||
|
|
||||||
/****************************************************************************
|
|
||||||
* Exports
|
|
||||||
****************************************************************************/
|
|
||||||
|
|
||||||
/****************************************************************************
|
|
||||||
*
|
|
||||||
****************************************************************************/
|
|
||||||
void
|
|
||||||
vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
unsigned char *src_ptr1, *src_ptr2;
|
|
||||||
unsigned char *dest_ptr1, *dest_ptr2;
|
|
||||||
|
|
||||||
unsigned int Border;
|
|
||||||
int plane_stride;
|
|
||||||
int plane_height;
|
|
||||||
int plane_width;
|
|
||||||
|
|
||||||
/***********/
|
|
||||||
/* Y Plane */
|
|
||||||
/***********/
|
|
||||||
Border = ybf->border;
|
|
||||||
plane_stride = ybf->y_stride;
|
|
||||||
plane_height = ybf->y_height;
|
|
||||||
plane_width = ybf->y_width;
|
|
||||||
|
|
||||||
// copy the left and right most columns out
|
|
||||||
src_ptr1 = ybf->y_buffer;
|
|
||||||
src_ptr2 = src_ptr1 + plane_width - 1;
|
|
||||||
dest_ptr1 = src_ptr1 - Border;
|
|
||||||
dest_ptr2 = src_ptr2 + 1;
|
|
||||||
|
|
||||||
for (i = 0; i < plane_height; i++)
|
|
||||||
{
|
|
||||||
mi_cpu_fill8(dest_ptr1, src_ptr1[0], Border);
|
|
||||||
mi_cpu_fill8(dest_ptr2, src_ptr2[0], Border);
|
|
||||||
src_ptr1 += plane_stride;
|
|
||||||
src_ptr2 += plane_stride;
|
|
||||||
dest_ptr1 += plane_stride;
|
|
||||||
dest_ptr2 += plane_stride;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now copy the top and bottom source lines into each line of the respective borders
|
|
||||||
src_ptr1 = ybf->y_buffer - Border;
|
|
||||||
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
|
|
||||||
dest_ptr1 = src_ptr1 - (Border * plane_stride);
|
|
||||||
dest_ptr2 = src_ptr2 + plane_stride;
|
|
||||||
|
|
||||||
for (i = 0; i < (int)Border; i++)
|
|
||||||
{
|
|
||||||
mi_cpu_copy_fast(src_ptr1, dest_ptr1, plane_stride);
|
|
||||||
mi_cpu_copy_fast(src_ptr2, dest_ptr2, plane_stride);
|
|
||||||
dest_ptr1 += plane_stride;
|
|
||||||
dest_ptr2 += plane_stride;
|
|
||||||
}
|
|
||||||
|
|
||||||
plane_stride /= 2;
|
|
||||||
plane_height /= 2;
|
|
||||||
plane_width /= 2;
|
|
||||||
Border /= 2;
|
|
||||||
|
|
||||||
/***********/
|
|
||||||
/* U Plane */
|
|
||||||
/***********/
|
|
||||||
|
|
||||||
// copy the left and right most columns out
|
|
||||||
src_ptr1 = ybf->u_buffer;
|
|
||||||
src_ptr2 = src_ptr1 + plane_width - 1;
|
|
||||||
dest_ptr1 = src_ptr1 - Border;
|
|
||||||
dest_ptr2 = src_ptr2 + 1;
|
|
||||||
|
|
||||||
for (i = 0; i < plane_height; i++)
|
|
||||||
{
|
|
||||||
mi_cpu_fill8(dest_ptr1, src_ptr1[0], Border);
|
|
||||||
mi_cpu_fill8(dest_ptr2, src_ptr2[0], Border);
|
|
||||||
src_ptr1 += plane_stride;
|
|
||||||
src_ptr2 += plane_stride;
|
|
||||||
dest_ptr1 += plane_stride;
|
|
||||||
dest_ptr2 += plane_stride;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now copy the top and bottom source lines into each line of the respective borders
|
|
||||||
src_ptr1 = ybf->u_buffer - Border;
|
|
||||||
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
|
|
||||||
dest_ptr1 = src_ptr1 - (Border * plane_stride);
|
|
||||||
dest_ptr2 = src_ptr2 + plane_stride;
|
|
||||||
|
|
||||||
for (i = 0; i < (int)(Border); i++)
|
|
||||||
{
|
|
||||||
mi_cpu_copy_fast(src_ptr1, dest_ptr1, plane_stride);
|
|
||||||
mi_cpu_copy_fast(src_ptr2, dest_ptr2, plane_stride);
|
|
||||||
dest_ptr1 += plane_stride;
|
|
||||||
dest_ptr2 += plane_stride;
|
|
||||||
}
|
|
||||||
|
|
||||||
/***********/
|
|
||||||
/* V Plane */
|
|
||||||
/***********/
|
|
||||||
|
|
||||||
// copy the left and right most columns out
|
|
||||||
src_ptr1 = ybf->v_buffer;
|
|
||||||
src_ptr2 = src_ptr1 + plane_width - 1;
|
|
||||||
dest_ptr1 = src_ptr1 - Border;
|
|
||||||
dest_ptr2 = src_ptr2 + 1;
|
|
||||||
|
|
||||||
for (i = 0; i < plane_height; i++)
|
|
||||||
{
|
|
||||||
mi_cpu_fill8(dest_ptr1, src_ptr1[0], Border);
|
|
||||||
mi_cpu_fill8(dest_ptr2, src_ptr2[0], Border);
|
|
||||||
src_ptr1 += plane_stride;
|
|
||||||
src_ptr2 += plane_stride;
|
|
||||||
dest_ptr1 += plane_stride;
|
|
||||||
dest_ptr2 += plane_stride;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now copy the top and bottom source lines into each line of the respective borders
|
|
||||||
src_ptr1 = ybf->v_buffer - Border;
|
|
||||||
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
|
|
||||||
dest_ptr1 = src_ptr1 - (Border * plane_stride);
|
|
||||||
dest_ptr2 = src_ptr2 + plane_stride;
|
|
||||||
|
|
||||||
for (i = 0; i < (int)(Border); i++)
|
|
||||||
{
|
|
||||||
mi_cpu_copy_fast(src_ptr1, dest_ptr1, plane_stride);
|
|
||||||
mi_cpu_copy_fast(src_ptr2, dest_ptr2, plane_stride);
|
|
||||||
dest_ptr1 += plane_stride;
|
|
||||||
dest_ptr2 += plane_stride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/****************************************************************************
|
|
||||||
*
|
|
||||||
* ROUTINE : vp8_yv12_copy_frame
|
|
||||||
*
|
|
||||||
* INPUTS :
|
|
||||||
*
|
|
||||||
* OUTPUTS : None.
|
|
||||||
*
|
|
||||||
* RETURNS : void
|
|
||||||
*
|
|
||||||
* FUNCTION : Copies the source image into the destination image and
|
|
||||||
* updates the destination's UMV borders.
|
|
||||||
*
|
|
||||||
* SPECIAL NOTES : The frames are assumed to be identical in size.
|
|
||||||
*
|
|
||||||
****************************************************************************/
|
|
||||||
void
|
|
||||||
vp8_yv12_copy_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc)
|
|
||||||
{
|
|
||||||
int yplane_size = (src_ybc->y_height + 2 * src_ybc->border) * (src_ybc->y_stride);
|
|
||||||
int mem_size = (yplane_size * 3 / 2) + (src_ybc->y_stride * 2);
|
|
||||||
|
|
||||||
mi_cpu_copy_fast(src_ybc->buffer_alloc, dst_ybc->buffer_alloc, mem_size);
|
|
||||||
|
|
||||||
/* unsigned char *src_y, *dst_y;
|
|
||||||
unsigned char *src_u, *dst_u;
|
|
||||||
unsigned char *src_v, *dst_v;
|
|
||||||
|
|
||||||
int yheight, uv_height;
|
|
||||||
int ystride, uv_stride;
|
|
||||||
int border;
|
|
||||||
int yoffset, uvoffset;
|
|
||||||
|
|
||||||
border = src_ybc->border;
|
|
||||||
yheight = src_ybc->y_height;
|
|
||||||
uv_height = src_ybc->uv_height;
|
|
||||||
|
|
||||||
ystride = src_ybc->y_stride;
|
|
||||||
uv_stride = src_ybc->uv_stride;
|
|
||||||
|
|
||||||
yoffset = border * (ystride + 1);
|
|
||||||
uvoffset = border/2 * (uv_stride + 1);
|
|
||||||
|
|
||||||
src_y = src_ybc->y_buffer - yoffset;
|
|
||||||
dst_y = dst_ybc->y_buffer - yoffset;
|
|
||||||
src_u = src_ybc->u_buffer - uvoffset;
|
|
||||||
dst_u = dst_ybc->u_buffer - uvoffset;
|
|
||||||
src_v = src_ybc->v_buffer - uvoffset;
|
|
||||||
dst_v = dst_ybc->v_buffer - uvoffset;
|
|
||||||
|
|
||||||
mi_cpu_copy_fast (src_y, dst_y, ystride * (yheight + 2 * border));
|
|
||||||
mi_cpu_copy_fast (src_u, dst_u, uv_stride * (uv_height + border));
|
|
||||||
mi_cpu_copy_fast (src_v, dst_v, uv_stride * (uv_height + border));
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
#include <nitro/itcm_end.h>
|
|
@ -24,9 +24,12 @@ vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf)
|
|||||||
{
|
{
|
||||||
if (ybf)
|
if (ybf)
|
||||||
{
|
{
|
||||||
duck_free(ybf->buffer_alloc);
|
vpx_free(ybf->buffer_alloc);
|
||||||
|
|
||||||
ybf->buffer_alloc = 0;
|
/* buffer_alloc isn't accessed by most functions. Rather y_buffer,
|
||||||
|
u_buffer and v_buffer point to buffer_alloc and are used. Clear out
|
||||||
|
all of this so that a freed pointer isn't inadvertently used */
|
||||||
|
vpx_memset (ybf, 0, sizeof (YV12_BUFFER_CONFIG));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -44,38 +47,37 @@ vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int
|
|||||||
{
|
{
|
||||||
/*NOTE:*/
|
/*NOTE:*/
|
||||||
|
|
||||||
int yplane_size = (height + 2 * border) * (width + 2 * border);
|
|
||||||
int uvplane_size = ((1 + height) / 2 + border) * ((1 + width) / 2 + border);
|
|
||||||
|
|
||||||
if (ybf)
|
if (ybf)
|
||||||
{
|
{
|
||||||
|
int uv_width = width >> 1;
|
||||||
|
int uv_height = height >> 1;
|
||||||
|
int yplane_size = (height + 2 * border) * (width + 2 * border);
|
||||||
|
int uvplane_size = (uv_height + border) * (uv_width + border);
|
||||||
|
|
||||||
vp8_yv12_de_alloc_frame_buffer(ybf);
|
vp8_yv12_de_alloc_frame_buffer(ybf);
|
||||||
|
|
||||||
|
/* only support allocating buffers that have
|
||||||
|
a height and width that are multiples of 16 */
|
||||||
|
if ((width & 0xf) | (height & 0xf))
|
||||||
|
return -3;
|
||||||
|
|
||||||
ybf->y_width = width;
|
ybf->y_width = width;
|
||||||
ybf->y_height = height;
|
ybf->y_height = height;
|
||||||
ybf->y_stride = width + 2 * border;
|
ybf->y_stride = width + 2 * border;
|
||||||
|
|
||||||
ybf->uv_width = (1 + width) / 2;
|
ybf->uv_width = uv_width;
|
||||||
ybf->uv_height = (1 + height) / 2;
|
ybf->uv_height = uv_height;
|
||||||
ybf->uv_stride = ybf->uv_width + border;
|
ybf->uv_stride = uv_width + border;
|
||||||
|
|
||||||
ybf->border = border;
|
ybf->border = border;
|
||||||
ybf->frame_size = yplane_size + 2 * uvplane_size;
|
ybf->frame_size = yplane_size + 2 * uvplane_size;
|
||||||
|
|
||||||
/* Added 2 extra lines to framebuffer so that copy12x12 doesn't fail
|
ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size);
|
||||||
* when we have a large motion vector in V on the last v block.
|
|
||||||
* Note : We never use these pixels anyway so this doesn't hurt.
|
|
||||||
*/
|
|
||||||
ybf->buffer_alloc = (unsigned char *) duck_memalign(32, ybf->frame_size + (ybf->y_stride * 2) + 32, 0);
|
|
||||||
|
|
||||||
if (ybf->buffer_alloc == NULL)
|
if (ybf->buffer_alloc == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
ybf->y_buffer = ybf->buffer_alloc + (border * ybf->y_stride) + border;
|
ybf->y_buffer = ybf->buffer_alloc + (border * ybf->y_stride) + border;
|
||||||
|
|
||||||
if (yplane_size & 0xf)
|
|
||||||
yplane_size += 16 - (yplane_size & 0xf);
|
|
||||||
|
|
||||||
ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * ybf->uv_stride) + border / 2;
|
ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * ybf->uv_stride) + border / 2;
|
||||||
ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * ybf->uv_stride) + border / 2;
|
ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * ybf->uv_stride) + border / 2;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user