Merge remote branch 'internal/upstream' into HEAD
This commit is contained in:
commit
6901105e99
@ -54,9 +54,11 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
|
||||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6;
|
||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
|
||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
|
||||
rtcd->loopfilter.simple_mb_v =
|
||||
vp8_loop_filter_simple_vertical_edge_armv6;
|
||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
|
||||
rtcd->loopfilter.simple_mb_h =
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6;
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6;
|
||||
|
||||
rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6;
|
||||
|
@ -53,14 +53,11 @@ count RN r5
|
||||
|
||||
;r0 unsigned char *src_ptr,
|
||||
;r1 int src_pixel_step,
|
||||
;r2 const char *flimit,
|
||||
;r2 const char *blimit,
|
||||
;r3 const char *limit,
|
||||
;stack const char *thresh,
|
||||
;stack int count
|
||||
|
||||
;Note: All 16 elements in flimit are equal. So, in the code, only one load is needed
|
||||
;for flimit. Same way applies to limit and thresh.
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_horizontal_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
@ -72,14 +69,18 @@ count RN r5
|
||||
sub sp, sp, #16 ; create temp buffer
|
||||
|
||||
ldr r9, [src], pstep ; p3
|
||||
ldr r4, [r2], #4 ; flimit
|
||||
ldrb r4, [r2] ; blimit
|
||||
ldr r10, [src], pstep ; p2
|
||||
ldr r2, [r3], #4 ; limit
|
||||
ldrb r2, [r3] ; limit
|
||||
ldr r11, [src], pstep ; p1
|
||||
uadd8 r4, r4, r4 ; flimit * 2
|
||||
ldr r3, [r6], #4 ; thresh
|
||||
orr r4, r4, r4, lsl #8
|
||||
ldrb r3, [r6] ; thresh
|
||||
orr r2, r2, r2, lsl #8
|
||||
mov count, count, lsl #1 ; 4-in-parallel
|
||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
||||
orr r4, r4, r4, lsl #16
|
||||
orr r3, r3, r3, lsl #8
|
||||
orr r2, r2, r2, lsl #16
|
||||
orr r3, r3, r3, lsl #16
|
||||
|
||||
|Hnext8|
|
||||
; vp8_filter_mask() function
|
||||
@ -275,14 +276,18 @@ count RN r5
|
||||
sub sp, sp, #16 ; create temp buffer
|
||||
|
||||
ldr r9, [src], pstep ; p3
|
||||
ldr r4, [r2], #4 ; flimit
|
||||
ldrb r4, [r2] ; blimit
|
||||
ldr r10, [src], pstep ; p2
|
||||
ldr r2, [r3], #4 ; limit
|
||||
ldrb r2, [r3] ; limit
|
||||
ldr r11, [src], pstep ; p1
|
||||
uadd8 r4, r4, r4 ; flimit * 2
|
||||
ldr r3, [r6], #4 ; thresh
|
||||
orr r4, r4, r4, lsl #8
|
||||
ldrb r3, [r6] ; thresh
|
||||
orr r2, r2, r2, lsl #8
|
||||
mov count, count, lsl #1 ; 4-in-parallel
|
||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
||||
orr r4, r4, r4, lsl #16
|
||||
orr r3, r3, r3, lsl #8
|
||||
orr r2, r2, r2, lsl #16
|
||||
orr r3, r3, r3, lsl #16
|
||||
|
||||
|MBHnext8|
|
||||
|
||||
@ -584,15 +589,19 @@ count RN r5
|
||||
sub sp, sp, #16 ; create temp buffer
|
||||
|
||||
ldr r6, [src], pstep ; load source data
|
||||
ldr r4, [r2], #4 ; flimit
|
||||
ldrb r4, [r2] ; blimit
|
||||
ldr r7, [src], pstep
|
||||
ldr r2, [r3], #4 ; limit
|
||||
ldrb r2, [r3] ; limit
|
||||
ldr r8, [src], pstep
|
||||
uadd8 r4, r4, r4 ; flimit * 2
|
||||
ldr r3, [r12], #4 ; thresh
|
||||
orr r4, r4, r4, lsl #8
|
||||
ldrb r3, [r12] ; thresh
|
||||
orr r2, r2, r2, lsl #8
|
||||
ldr lr, [src], pstep
|
||||
mov count, count, lsl #1 ; 4-in-parallel
|
||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
||||
orr r4, r4, r4, lsl #16
|
||||
orr r3, r3, r3, lsl #8
|
||||
orr r2, r2, r2, lsl #16
|
||||
orr r3, r3, r3, lsl #16
|
||||
|
||||
|Vnext8|
|
||||
|
||||
@ -855,18 +864,22 @@ count RN r5
|
||||
sub sp, sp, #16 ; create temp buffer
|
||||
|
||||
ldr r6, [src], pstep ; load source data
|
||||
ldr r4, [r2], #4 ; flimit
|
||||
ldrb r4, [r2] ; blimit
|
||||
pld [src, #23]
|
||||
ldr r7, [src], pstep
|
||||
ldr r2, [r3], #4 ; limit
|
||||
ldrb r2, [r3] ; limit
|
||||
pld [src, #23]
|
||||
ldr r8, [src], pstep
|
||||
uadd8 r4, r4, r4 ; flimit * 2
|
||||
ldr r3, [r12], #4 ; thresh
|
||||
orr r4, r4, r4, lsl #8
|
||||
ldrb r3, [r12] ; thresh
|
||||
orr r2, r2, r2, lsl #8
|
||||
pld [src, #23]
|
||||
ldr lr, [src], pstep
|
||||
mov count, count, lsl #1 ; 4-in-parallel
|
||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
||||
orr r4, r4, r4, lsl #16
|
||||
orr r3, r3, r3, lsl #8
|
||||
orr r2, r2, r2, lsl #16
|
||||
orr r3, r3, r3, lsl #16
|
||||
|
||||
|MBVnext8|
|
||||
; vp8_filter_mask() function
|
||||
@ -906,6 +919,7 @@ count RN r5
|
||||
str lr, [sp, #8]
|
||||
ldr lr, [src], pstep
|
||||
|
||||
|
||||
TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12
|
||||
|
||||
ldr lr, [sp, #8] ; load back (f)limit accumulator
|
||||
@ -954,6 +968,7 @@ count RN r5
|
||||
beq mbvskip_filter ; skip filtering
|
||||
|
||||
|
||||
|
||||
;vp8_hevmask() function
|
||||
;calculate high edge variance
|
||||
|
||||
@ -1121,6 +1136,7 @@ count RN r5
|
||||
smlabb r8, r6, lr, r7
|
||||
smlatb r6, r6, lr, r7
|
||||
smlabb r9, r10, lr, r7
|
||||
|
||||
smlatb r10, r10, lr, r7
|
||||
ssat r8, #8, r8, asr #7
|
||||
ssat r6, #8, r6, asr #7
|
||||
|
@ -45,35 +45,28 @@
|
||||
MEND
|
||||
|
||||
|
||||
|
||||
src RN r0
|
||||
pstep RN r1
|
||||
|
||||
;r0 unsigned char *src_ptr,
|
||||
;r1 int src_pixel_step,
|
||||
;r2 const char *flimit,
|
||||
;r3 const char *limit,
|
||||
;stack const char *thresh,
|
||||
;stack int count
|
||||
|
||||
; All 16 elements in flimit are equal. So, in the code, only one load is needed
|
||||
; for flimit. Same applies to limit. thresh is not used in simple looopfilter
|
||||
;r2 const char *blimit
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r12, [r3] ; limit
|
||||
ldrb r12, [r2] ; blimit
|
||||
ldr r3, [src, -pstep, lsl #1] ; p1
|
||||
ldr r4, [src, -pstep] ; p0
|
||||
ldr r5, [src] ; q0
|
||||
ldr r6, [src, pstep] ; q1
|
||||
ldr r7, [r2] ; flimit
|
||||
orr r12, r12, r12, lsl #8 ; blimit
|
||||
ldr r2, c0x80808080
|
||||
ldr r9, [sp, #40] ; count for 8-in-parallel
|
||||
uadd8 r7, r7, r7 ; flimit * 2
|
||||
mov r9, r9, lsl #1 ; double the count. we're doing 4 at a time
|
||||
uadd8 r12, r7, r12 ; flimit * 2 + limit
|
||||
orr r12, r12, r12, lsl #16 ; blimit
|
||||
mov r9, #4 ; double the count. we're doing 4 at a time
|
||||
mov lr, #0 ; need 0 in a couple places
|
||||
|
||||
|simple_hnext8|
|
||||
@ -148,34 +141,32 @@ pstep RN r1
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r12, [r2] ; r12: flimit
|
||||
ldrb r12, [r2] ; r12: blimit
|
||||
ldr r2, c0x80808080
|
||||
ldr r7, [r3] ; limit
|
||||
orr r12, r12, r12, lsl #8
|
||||
|
||||
; load soure data to r7, r8, r9, r10
|
||||
ldrh r3, [src, #-2]
|
||||
pld [src, #23] ; preload for next block
|
||||
ldrh r4, [src], pstep
|
||||
uadd8 r12, r12, r12 ; flimit * 2
|
||||
orr r12, r12, r12, lsl #16
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r6, [src], pstep
|
||||
uadd8 r12, r12, r7 ; flimit * 2 + limit
|
||||
|
||||
pkhbt r7, r3, r4, lsl #16
|
||||
|
||||
ldrh r3, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r4, [src], pstep
|
||||
ldr r11, [sp, #40] ; count (r11) for 8-in-parallel
|
||||
|
||||
pkhbt r8, r5, r6, lsl #16
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r6, [src], pstep
|
||||
mov r11, r11, lsl #1 ; 4-in-parallel
|
||||
mov r11, #4 ; double the count. we're doing 4 at a time
|
||||
|
||||
|simple_vnext8|
|
||||
; vp8_simple_filter_mask() function
|
||||
|
@ -9,30 +9,34 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include <math.h>
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
#if HAVE_ARMV6
|
||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
|
||||
#endif
|
||||
|
||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_y_neon);
|
||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_y_neon);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_y_neon);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_y_neon);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_neon);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_neon);
|
||||
#if HAVE_ARMV7
|
||||
typedef void loopfilter_y_neon(unsigned char *src, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh);
|
||||
typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh,
|
||||
unsigned char *v);
|
||||
|
||||
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_neon;
|
||||
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_neon;
|
||||
extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_neon;
|
||||
extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;
|
||||
extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
|
||||
|
||||
extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV6
|
||||
/*ARMV6 loopfilter functions*/
|
||||
@ -40,96 +44,72 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;
|
||||
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -139,83 +119,58 @@ void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
||||
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4);
|
||||
}
|
||||
#endif
|
||||
|
@ -12,15 +12,17 @@
|
||||
#ifndef LOOPFILTER_ARM_H
|
||||
#define LOOPFILTER_ARM_H
|
||||
|
||||
#include "vpx_config.h"
|
||||
|
||||
#if HAVE_ARMV6
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbv_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_armv6);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_armv6);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_lf_normal_mb_v
|
||||
@ -36,28 +38,29 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
|
||||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_armv6
|
||||
|
||||
#undef vp8_lf_simple_mb_v
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_armv6
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_armv6
|
||||
|
||||
#undef vp8_lf_simple_b_v
|
||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_armv6
|
||||
|
||||
#undef vp8_lf_simple_mb_h
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_armv6
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_armv6
|
||||
|
||||
#undef vp8_lf_simple_b_h
|
||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
|
||||
#endif
|
||||
#endif
|
||||
#endif /* !CONFIG_RUNTIME_CPU_DETECT */
|
||||
|
||||
#endif /* HAVE_ARMV6 */
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_mbvs_neon);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_neon);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_mbhs_neon);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_neon);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_lf_normal_mb_v
|
||||
@ -83,7 +86,8 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
|
||||
|
||||
#undef vp8_lf_simple_b_h
|
||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
|
||||
#endif
|
||||
#endif
|
||||
#endif /* !CONFIG_RUNTIME_CPU_DETECT */
|
||||
|
||||
#endif
|
||||
#endif /* HAVE_ARMV7 */
|
||||
|
||||
#endif /* LOOPFILTER_ARM_H */
|
||||
|
@ -14,109 +14,97 @@
|
||||
EXPORT |vp8_loop_filter_vertical_edge_y_neon|
|
||||
EXPORT |vp8_loop_filter_vertical_edge_uv_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; flimit, limit, and thresh should be positive numbers.
|
||||
; All 16 elements in these variables are equal.
|
||||
|
||||
; void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; int count)
|
||||
; r0 unsigned char *src
|
||||
; r1 int pitch
|
||||
; r2 const signed char *flimit
|
||||
; r3 const signed char *limit
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 int count (unused)
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
|vp8_loop_filter_horizontal_edge_y_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
push {lr}
|
||||
vdup.u8 q0, r2 ; duplicate blimit
|
||||
vdup.u8 q1, r3 ; duplicate limit
|
||||
sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
|
||||
ldr r3, [sp, #4] ; load thresh
|
||||
add r12, r2, r1
|
||||
add r1, r1, r1
|
||||
|
||||
vld1.u8 {q3}, [r2], r1 ; p3
|
||||
vld1.u8 {q4}, [r2], r1 ; p2
|
||||
vld1.u8 {q5}, [r2], r1 ; p1
|
||||
vld1.u8 {q6}, [r2], r1 ; p0
|
||||
vld1.u8 {q7}, [r2], r1 ; q0
|
||||
vld1.u8 {q8}, [r2], r1 ; q1
|
||||
vld1.u8 {q9}, [r2], r1 ; q2
|
||||
vld1.u8 {q10}, [r2] ; q3
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
sub r0, r0, r1, lsl #1
|
||||
vdup.u8 q2, r3 ; duplicate thresh
|
||||
|
||||
vld1.u8 {q3}, [r2@128], r1 ; p3
|
||||
vld1.u8 {q4}, [r12@128], r1 ; p2
|
||||
vld1.u8 {q5}, [r2@128], r1 ; p1
|
||||
vld1.u8 {q6}, [r12@128], r1 ; p0
|
||||
vld1.u8 {q7}, [r2@128], r1 ; q0
|
||||
vld1.u8 {q8}, [r12@128], r1 ; q1
|
||||
vld1.u8 {q9}, [r2@128] ; q2
|
||||
vld1.u8 {q10}, [r12@128] ; q3
|
||||
|
||||
sub r2, r2, r1, lsl #1
|
||||
sub r12, r12, r1, lsl #1
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
vst1.u8 {q5}, [r0], r1 ; store op1
|
||||
vst1.u8 {q6}, [r0], r1 ; store op0
|
||||
vst1.u8 {q7}, [r0], r1 ; store oq0
|
||||
vst1.u8 {q8}, [r0], r1 ; store oq1
|
||||
vst1.u8 {q5}, [r2@128], r1 ; store op1
|
||||
vst1.u8 {q6}, [r12@128], r1 ; store op0
|
||||
vst1.u8 {q7}, [r2@128], r1 ; store oq0
|
||||
vst1.u8 {q8}, [r12@128], r1 ; store oq1
|
||||
|
||||
ldmia sp!, {pc}
|
||||
pop {pc}
|
||||
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
|
||||
|
||||
; void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; unsigned char *v)
|
||||
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_loop_filter_horizontal_edge_uv_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
push {lr}
|
||||
vdup.u8 q0, r2 ; duplicate blimit
|
||||
vdup.u8 q1, r3 ; duplicate limit
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
ldr r2, [sp, #8] ; load v ptr
|
||||
vdup.u8 q2, r12 ; duplicate thresh
|
||||
|
||||
sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines
|
||||
vld1.u8 {d6}, [r3], r1 ; p3
|
||||
vld1.u8 {d8}, [r3], r1 ; p2
|
||||
vld1.u8 {d10}, [r3], r1 ; p1
|
||||
vld1.u8 {d12}, [r3], r1 ; p0
|
||||
vld1.u8 {d14}, [r3], r1 ; q0
|
||||
vld1.u8 {d16}, [r3], r1 ; q1
|
||||
vld1.u8 {d18}, [r3], r1 ; q2
|
||||
vld1.u8 {d20}, [r3] ; q3
|
||||
|
||||
ldr r3, [sp, #4] ; load thresh pointer
|
||||
|
||||
sub r12, r2, r1, lsl #2 ; move v pointer down by 4 lines
|
||||
vld1.u8 {d7}, [r12], r1 ; p3
|
||||
vld1.u8 {d9}, [r12], r1 ; p2
|
||||
vld1.u8 {d11}, [r12], r1 ; p1
|
||||
vld1.u8 {d13}, [r12], r1 ; p0
|
||||
vld1.u8 {d15}, [r12], r1 ; q0
|
||||
vld1.u8 {d17}, [r12], r1 ; q1
|
||||
vld1.u8 {d19}, [r12], r1 ; q2
|
||||
vld1.u8 {d21}, [r12] ; q3
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r3] ; thresh
|
||||
vld1.u8 {d6}, [r3@64], r1 ; p3
|
||||
vld1.u8 {d7}, [r12@64], r1 ; p3
|
||||
vld1.u8 {d8}, [r3@64], r1 ; p2
|
||||
vld1.u8 {d9}, [r12@64], r1 ; p2
|
||||
vld1.u8 {d10}, [r3@64], r1 ; p1
|
||||
vld1.u8 {d11}, [r12@64], r1 ; p1
|
||||
vld1.u8 {d12}, [r3@64], r1 ; p0
|
||||
vld1.u8 {d13}, [r12@64], r1 ; p0
|
||||
vld1.u8 {d14}, [r3@64], r1 ; q0
|
||||
vld1.u8 {d15}, [r12@64], r1 ; q0
|
||||
vld1.u8 {d16}, [r3@64], r1 ; q1
|
||||
vld1.u8 {d17}, [r12@64], r1 ; q1
|
||||
vld1.u8 {d18}, [r3@64], r1 ; q2
|
||||
vld1.u8 {d19}, [r12@64], r1 ; q2
|
||||
vld1.u8 {d20}, [r3@64] ; q3
|
||||
vld1.u8 {d21}, [r12@64] ; q3
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #1
|
||||
sub r2, r2, r1, lsl #1
|
||||
|
||||
vst1.u8 {d10}, [r0], r1 ; store u op1
|
||||
vst1.u8 {d11}, [r2], r1 ; store v op1
|
||||
vst1.u8 {d12}, [r0], r1 ; store u op0
|
||||
vst1.u8 {d13}, [r2], r1 ; store v op0
|
||||
vst1.u8 {d14}, [r0], r1 ; store u oq0
|
||||
vst1.u8 {d15}, [r2], r1 ; store v oq0
|
||||
vst1.u8 {d16}, [r0] ; store u oq1
|
||||
vst1.u8 {d17}, [r2] ; store v oq1
|
||||
vst1.u8 {d10}, [r0@64], r1 ; store u op1
|
||||
vst1.u8 {d11}, [r2@64], r1 ; store v op1
|
||||
vst1.u8 {d12}, [r0@64], r1 ; store u op0
|
||||
vst1.u8 {d13}, [r2@64], r1 ; store v op0
|
||||
vst1.u8 {d14}, [r0@64], r1 ; store u oq0
|
||||
vst1.u8 {d15}, [r2@64], r1 ; store v oq0
|
||||
vst1.u8 {d16}, [r0@64] ; store u oq1
|
||||
vst1.u8 {d17}, [r2@64] ; store v oq1
|
||||
|
||||
ldmia sp!, {pc}
|
||||
pop {pc}
|
||||
ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon|
|
||||
|
||||
; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
|
||||
@ -124,39 +112,38 @@
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; int count)
|
||||
; r0 unsigned char *src,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 int count (unused)
|
||||
; r0 unsigned char *src
|
||||
; r1 int pitch
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
|
||||
|vp8_loop_filter_vertical_edge_y_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
sub r2, r0, #4 ; src ptr down by 4 columns
|
||||
sub r0, r0, #2 ; dst ptr
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
push {lr}
|
||||
vdup.u8 q0, r2 ; duplicate blimit
|
||||
vdup.u8 q1, r3 ; duplicate limit
|
||||
sub r2, r0, #4 ; src ptr down by 4 columns
|
||||
add r1, r1, r1
|
||||
ldr r3, [sp, #4] ; load thresh
|
||||
add r12, r2, r1, asr #1
|
||||
|
||||
vld1.u8 {d6}, [r2], r1 ; load first 8-line src data
|
||||
vld1.u8 {d8}, [r2], r1
|
||||
vld1.u8 {d6}, [r2], r1
|
||||
vld1.u8 {d8}, [r12], r1
|
||||
vld1.u8 {d10}, [r2], r1
|
||||
vld1.u8 {d12}, [r2], r1
|
||||
vld1.u8 {d12}, [r12], r1
|
||||
vld1.u8 {d14}, [r2], r1
|
||||
vld1.u8 {d16}, [r2], r1
|
||||
vld1.u8 {d16}, [r12], r1
|
||||
vld1.u8 {d18}, [r2], r1
|
||||
vld1.u8 {d20}, [r2], r1
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
vld1.u8 {d20}, [r12], r1
|
||||
|
||||
vld1.u8 {d7}, [r2], r1 ; load second 8-line src data
|
||||
vld1.u8 {d9}, [r2], r1
|
||||
vld1.u8 {d9}, [r12], r1
|
||||
vld1.u8 {d11}, [r2], r1
|
||||
vld1.u8 {d13}, [r2], r1
|
||||
vld1.u8 {d13}, [r12], r1
|
||||
vld1.u8 {d15}, [r2], r1
|
||||
vld1.u8 {d17}, [r2], r1
|
||||
vld1.u8 {d19}, [r2], r1
|
||||
vld1.u8 {d21}, [r2]
|
||||
vld1.u8 {d17}, [r12], r1
|
||||
vld1.u8 {d19}, [r2]
|
||||
vld1.u8 {d21}, [r12]
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
@ -164,6 +151,8 @@
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vdup.u8 q2, r3 ; duplicate thresh
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
@ -178,28 +167,34 @@
|
||||
|
||||
vswp d12, d11
|
||||
vswp d16, d13
|
||||
|
||||
sub r0, r0, #2 ; dst ptr
|
||||
|
||||
vswp d14, d12
|
||||
vswp d16, d15
|
||||
|
||||
add r12, r0, r1, asr #1
|
||||
|
||||
;store op1, op0, oq0, oq1
|
||||
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
|
||||
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r12], r1
|
||||
vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
|
||||
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
|
||||
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r12], r1
|
||||
vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
|
||||
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
|
||||
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r12], r1
|
||||
vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
|
||||
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
|
||||
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
|
||||
vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r0], r1
|
||||
vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
|
||||
vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r0], r1
|
||||
vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
|
||||
vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
|
||||
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0], r1
|
||||
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r0]
|
||||
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r12], r1
|
||||
|
||||
ldmia sp!, {pc}
|
||||
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
|
||||
vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r12], r1
|
||||
vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
|
||||
vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r12], r1
|
||||
vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
|
||||
vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r12], r1
|
||||
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0]
|
||||
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r12]
|
||||
|
||||
pop {pc}
|
||||
ENDP ; |vp8_loop_filter_vertical_edge_y_neon|
|
||||
|
||||
; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
|
||||
@ -209,38 +204,36 @@
|
||||
; unsigned char *v)
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_loop_filter_vertical_edge_uv_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
sub r12, r0, #4 ; move u pointer down by 4 columns
|
||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
|
||||
push {lr}
|
||||
vdup.u8 q0, r2 ; duplicate blimit
|
||||
sub r12, r0, #4 ; move u pointer down by 4 columns
|
||||
ldr r2, [sp, #8] ; load v ptr
|
||||
|
||||
vld1.u8 {d6}, [r12], r1 ;load u data
|
||||
vld1.u8 {d8}, [r12], r1
|
||||
vld1.u8 {d10}, [r12], r1
|
||||
vld1.u8 {d12}, [r12], r1
|
||||
vld1.u8 {d14}, [r12], r1
|
||||
vld1.u8 {d16}, [r12], r1
|
||||
vld1.u8 {d18}, [r12], r1
|
||||
vld1.u8 {d20}, [r12]
|
||||
|
||||
vdup.u8 q1, r3 ; duplicate limit
|
||||
sub r3, r2, #4 ; move v pointer down by 4 columns
|
||||
|
||||
vld1.u8 {d6}, [r12], r1 ;load u data
|
||||
vld1.u8 {d7}, [r3], r1 ;load v data
|
||||
vld1.u8 {d8}, [r12], r1
|
||||
vld1.u8 {d9}, [r3], r1
|
||||
vld1.u8 {d10}, [r12], r1
|
||||
vld1.u8 {d11}, [r3], r1
|
||||
vld1.u8 {d12}, [r12], r1
|
||||
vld1.u8 {d13}, [r3], r1
|
||||
vld1.u8 {d14}, [r12], r1
|
||||
vld1.u8 {d15}, [r3], r1
|
||||
vld1.u8 {d16}, [r12], r1
|
||||
vld1.u8 {d17}, [r3], r1
|
||||
vld1.u8 {d18}, [r12], r1
|
||||
vld1.u8 {d19}, [r3], r1
|
||||
vld1.u8 {d20}, [r12]
|
||||
vld1.u8 {d21}, [r3]
|
||||
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
@ -248,6 +241,8 @@
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vdup.u8 q2, r12 ; duplicate thresh
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
@ -258,18 +253,16 @@
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
sub r0, r0, #2
|
||||
sub r2, r2, #2
|
||||
|
||||
vswp d12, d11
|
||||
vswp d16, d13
|
||||
vswp d14, d12
|
||||
vswp d16, d15
|
||||
|
||||
sub r0, r0, #2
|
||||
sub r2, r2, #2
|
||||
|
||||
;store op1, op0, oq0, oq1
|
||||
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
|
||||
@ -288,7 +281,7 @@
|
||||
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0]
|
||||
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2]
|
||||
|
||||
ldmia sp!, {pc}
|
||||
pop {pc}
|
||||
ENDP ; |vp8_loop_filter_vertical_edge_uv_neon|
|
||||
|
||||
; void vp8_loop_filter_neon();
|
||||
@ -316,42 +309,44 @@
|
||||
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
|
||||
vabd.u8 q3, q9, q8 ; abs(q2 - q1)
|
||||
vabd.u8 q4, q10, q9 ; abs(q3 - q2)
|
||||
vabd.u8 q9, q6, q7 ; abs(p0 - q0)
|
||||
|
||||
vmax.u8 q11, q11, q12
|
||||
vmax.u8 q12, q13, q14
|
||||
vmax.u8 q3, q3, q4
|
||||
vmax.u8 q15, q11, q12
|
||||
|
||||
vabd.u8 q9, q6, q7 ; abs(p0 - q0)
|
||||
|
||||
; vp8_hevmask
|
||||
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
|
||||
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
|
||||
vmax.u8 q15, q15, q3
|
||||
|
||||
vadd.u8 q0, q0, q0 ; flimit * 2
|
||||
vadd.u8 q0, q0, q1 ; flimit * 2 + limit
|
||||
vcge.u8 q15, q1, q15
|
||||
vmov.u8 q10, #0x80 ; 0x80
|
||||
|
||||
vabd.u8 q2, q5, q8 ; a = abs(p1 - q1)
|
||||
vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2
|
||||
vshr.u8 q2, q2, #1 ; a = a / 2
|
||||
vqadd.u8 q9, q9, q2 ; a = b + a
|
||||
vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
|
||||
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
vcge.u8 q15, q1, q15
|
||||
|
||||
; vp8_filter() function
|
||||
; convert to signed
|
||||
veor q7, q7, q0 ; qs0
|
||||
veor q6, q6, q0 ; ps0
|
||||
veor q5, q5, q0 ; ps1
|
||||
veor q8, q8, q0 ; qs1
|
||||
veor q7, q7, q10 ; qs0
|
||||
vshr.u8 q2, q2, #1 ; a = a / 2
|
||||
veor q6, q6, q10 ; ps0
|
||||
|
||||
veor q5, q5, q10 ; ps1
|
||||
vqadd.u8 q9, q9, q2 ; a = b + a
|
||||
|
||||
veor q8, q8, q10 ; qs1
|
||||
|
||||
vmov.u8 q10, #3 ; #3
|
||||
|
||||
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
|
||||
vsubl.s8 q11, d15, d13
|
||||
|
||||
vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
|
||||
|
||||
vmovl.u8 q4, d20
|
||||
|
||||
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
|
||||
@ -378,19 +373,20 @@
|
||||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||
vshr.s8 q1, q1, #3 ; Filter1 >>= 3
|
||||
|
||||
|
||||
vqadd.s8 q11, q6, q2 ; u = clamp(ps0 + Filter2)
|
||||
vqsub.s8 q10, q7, q1 ; u = clamp(qs0 - Filter1)
|
||||
|
||||
; outer tap adjustments: ++vp8_filter >> 1
|
||||
vrshr.s8 q1, q1, #1
|
||||
vbic q1, q1, q14 ; vp8_filter &= ~hev
|
||||
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + vp8_filter)
|
||||
vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - vp8_filter)
|
||||
|
||||
veor q5, q13, q0 ; *op1 = u^0x80
|
||||
veor q6, q11, q0 ; *op0 = u^0x80
|
||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||
veor q5, q13, q0 ; *op1 = u^0x80
|
||||
veor q8, q12, q0 ; *oq1 = u^0x80
|
||||
|
||||
bx lr
|
||||
|
@ -9,99 +9,109 @@
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||
;EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||
EXPORT |vp8_loop_filter_bhs_neon|
|
||||
EXPORT |vp8_loop_filter_mbhs_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
|
||||
;are equal. So, in the code, only one load is needed
|
||||
;for flimit. Same way applies to limit and thresh.
|
||||
; r0 unsigned char *s,
|
||||
; r1 int p, //pitch
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; stack(r4) const signed char *thresh (unused)
|
||||
; //stack(r5) int count --unused
|
||||
|
||||
; r0 unsigned char *s, PRESERVE
|
||||
; r1 int p, PRESERVE
|
||||
; q1 limit, PRESERVE
|
||||
|
||||
|vp8_loop_filter_simple_horizontal_edge_neon| PROC
|
||||
sub r0, r0, r1, lsl #1 ; move src pointer down by 2 lines
|
||||
|
||||
vld1.u8 {q5}, [r0], r1 ; p1
|
||||
vld1.s8 {d2[], d3[]}, [r2] ; flimit
|
||||
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
|
||||
vld1.u8 {q6}, [r0], r1 ; p0
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
vld1.u8 {q7}, [r0], r1 ; q0
|
||||
vmov.u8 q10, #0x03 ; 0x03
|
||||
vld1.u8 {q8}, [r0] ; q1
|
||||
sub r3, r0, r1, lsl #1 ; move src pointer down by 2 lines
|
||||
|
||||
vld1.u8 {q7}, [r0@128], r1 ; q0
|
||||
vld1.u8 {q5}, [r3@128], r1 ; p0
|
||||
vld1.u8 {q8}, [r0@128] ; q1
|
||||
vld1.u8 {q6}, [r3@128] ; p1
|
||||
|
||||
;vp8_filter_mask() function
|
||||
vabd.u8 q15, q6, q7 ; abs(p0 - q0)
|
||||
vabd.u8 q14, q5, q8 ; abs(p1 - q1)
|
||||
|
||||
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
|
||||
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
vmov.s16 q13, #3
|
||||
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
|
||||
|
||||
;vp8_filter() function
|
||||
veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
|
||||
veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
|
||||
veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
|
||||
veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
|
||||
|
||||
vadd.u8 q1, q1, q1 ; flimit * 2
|
||||
vadd.u8 q1, q1, q13 ; flimit * 2 + limit
|
||||
vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
|
||||
vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > limit)*-1
|
||||
|
||||
;;;;;;;;;;
|
||||
;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
|
||||
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
|
||||
vsubl.s8 q3, d15, d13
|
||||
|
||||
vqsub.s8 q4, q5, q8 ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
||||
|
||||
;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0)
|
||||
vadd.s16 q11, q2, q2 ; 3 * ( qs0 - ps0)
|
||||
vadd.s16 q12, q3, q3
|
||||
vmul.s16 q2, q2, q13 ; 3 * ( qs0 - ps0)
|
||||
vmul.s16 q3, q3, q13
|
||||
|
||||
vmov.u8 q10, #0x03 ; 0x03
|
||||
vmov.u8 q9, #0x04 ; 0x04
|
||||
|
||||
vadd.s16 q2, q2, q11
|
||||
vadd.s16 q3, q3, q12
|
||||
|
||||
vaddw.s8 q2, q2, d8 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||
vaddw.s8 q3, q3, d9
|
||||
|
||||
;vqadd.s8 q4, q4, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d8, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d9, q3
|
||||
;;;;;;;;;;;;;
|
||||
|
||||
vand q4, q4, q15 ; vp8_filter &= mask
|
||||
vand q14, q4, q15 ; vp8_filter &= mask
|
||||
|
||||
vqadd.s8 q2, q4, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
||||
vqadd.s8 q4, q4, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
||||
vqadd.s8 q2, q14, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
||||
vqadd.s8 q3, q14, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
||||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||
vshr.s8 q4, q4, #3 ; Filter1 >>= 3
|
||||
vshr.s8 q4, q3, #3 ; Filter1 >>= 3
|
||||
|
||||
sub r0, r0, r1, lsl #1
|
||||
sub r0, r0, r1
|
||||
|
||||
;calculate output
|
||||
vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
|
||||
vqsub.s8 q10, q7, q4 ; u = vp8_signed_char_clamp(qs0 - Filter1)
|
||||
|
||||
add r3, r0, r1
|
||||
|
||||
veor q6, q11, q0 ; *op0 = u^0x80
|
||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||
|
||||
vst1.u8 {q6}, [r0] ; store op0
|
||||
vst1.u8 {q7}, [r3] ; store oq0
|
||||
vst1.u8 {q6}, [r3@128] ; store op0
|
||||
vst1.u8 {q7}, [r0@128] ; store oq0
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||
|
||||
;-----------------
|
||||
; r0 unsigned char *y
|
||||
; r1 int ystride
|
||||
; r2 const unsigned char *blimit
|
||||
|
||||
|vp8_loop_filter_bhs_neon| PROC
|
||||
push {r4, lr}
|
||||
ldrb r3, [r2] ; load blim from mem
|
||||
vdup.s8 q1, r3 ; duplicate blim
|
||||
|
||||
add r0, r0, r1, lsl #2 ; src = y_ptr + 4 * y_stride
|
||||
bl vp8_loop_filter_simple_horizontal_edge_neon
|
||||
; vp8_loop_filter_simple_horizontal_edge_neon preserves r0, r1 and q1
|
||||
add r0, r0, r1, lsl #2 ; src = y_ptr + 8* y_stride
|
||||
bl vp8_loop_filter_simple_horizontal_edge_neon
|
||||
add r0, r0, r1, lsl #2 ; src = y_ptr + 12 * y_stride
|
||||
pop {r4, lr}
|
||||
b vp8_loop_filter_simple_horizontal_edge_neon
|
||||
ENDP ;|vp8_loop_filter_bhs_neon|
|
||||
|
||||
; r0 unsigned char *y
|
||||
; r1 int ystride
|
||||
; r2 const unsigned char *blimit
|
||||
|
||||
|vp8_loop_filter_mbhs_neon| PROC
|
||||
ldrb r3, [r2] ; load blim from mem
|
||||
vdup.s8 q1, r3 ; duplicate mblim
|
||||
b vp8_loop_filter_simple_horizontal_edge_neon
|
||||
ENDP ;|vp8_loop_filter_bhs_neon|
|
||||
|
||||
END
|
||||
|
@ -9,59 +9,54 @@
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_loop_filter_simple_vertical_edge_neon|
|
||||
;EXPORT |vp8_loop_filter_simple_vertical_edge_neon|
|
||||
EXPORT |vp8_loop_filter_bvs_neon|
|
||||
EXPORT |vp8_loop_filter_mbvs_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
;Note: flimit, limit, and thresh should be positive numbers. All 16 elements in flimit
|
||||
;are equal. So, in the code, only one load is needed
|
||||
;for flimit. Same way applies to limit and thresh.
|
||||
; r0 unsigned char *s,
|
||||
; r1 int p, //pitch
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; stack(r4) const signed char *thresh (unused)
|
||||
; //stack(r5) int count --unused
|
||||
|
||||
; r0 unsigned char *s, PRESERVE
|
||||
; r1 int p, PRESERVE
|
||||
; q1 limit, PRESERVE
|
||||
|
||||
|vp8_loop_filter_simple_vertical_edge_neon| PROC
|
||||
sub r0, r0, #2 ; move src pointer down by 2 columns
|
||||
add r12, r1, r1
|
||||
add r3, r0, r1
|
||||
|
||||
vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r1
|
||||
vld1.s8 {d2[], d3[]}, [r2] ; flimit
|
||||
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
|
||||
vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r0], r1
|
||||
vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r1
|
||||
vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r0], r1
|
||||
vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r1
|
||||
vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r0], r1
|
||||
vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r1
|
||||
vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r0], r1
|
||||
vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r12
|
||||
vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r3], r12
|
||||
vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r12
|
||||
vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r3], r12
|
||||
vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r12
|
||||
vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r3], r12
|
||||
vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r12
|
||||
vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r3], r12
|
||||
|
||||
vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
|
||||
vmov.u8 q11, #0x03 ; 0x03
|
||||
vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
|
||||
vmov.u8 q12, #0x04 ; 0x04
|
||||
vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
|
||||
vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
|
||||
vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
|
||||
vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
|
||||
vld4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
|
||||
vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r12
|
||||
vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r3], r12
|
||||
vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r12
|
||||
vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r3], r12
|
||||
vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r12
|
||||
vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r3], r12
|
||||
vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r12
|
||||
vld4.8 {d10[7], d11[7], d12[7], d13[7]}, [r3]
|
||||
|
||||
vswp d7, d10
|
||||
vswp d12, d9
|
||||
;vswp q4, q5 ; p1:q3, p0:q5, q0:q4, q1:q6
|
||||
|
||||
;vp8_filter_mask() function
|
||||
;vp8_hevmask() function
|
||||
sub r0, r0, r1, lsl #4
|
||||
vabd.u8 q15, q5, q4 ; abs(p0 - q0)
|
||||
vabd.u8 q14, q3, q6 ; abs(p1 - q1)
|
||||
|
||||
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
|
||||
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
vmov.s16 q11, #3
|
||||
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
|
||||
|
||||
veor q4, q4, q0 ; qs0: q0 offset to convert to a signed value
|
||||
@ -69,80 +64,91 @@
|
||||
veor q3, q3, q0 ; ps1: p1 offset to convert to a signed value
|
||||
veor q6, q6, q0 ; qs1: q1 offset to convert to a signed value
|
||||
|
||||
vadd.u8 q1, q1, q1 ; flimit * 2
|
||||
vadd.u8 q1, q1, q13 ; flimit * 2 + limit
|
||||
vcge.u8 q15, q1, q15 ; abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
|
||||
|
||||
;vp8_filter() function
|
||||
;;;;;;;;;;
|
||||
;vqsub.s8 q2, q5, q4 ; ( qs0 - ps0)
|
||||
vsubl.s8 q2, d8, d10 ; ( qs0 - ps0)
|
||||
vsubl.s8 q13, d9, d11
|
||||
|
||||
vqsub.s8 q1, q3, q6 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
||||
vqsub.s8 q14, q3, q6 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
||||
|
||||
;vmul.i8 q2, q2, q11 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0)
|
||||
vadd.s16 q14, q13, q13
|
||||
vadd.s16 q2, q2, q10
|
||||
vadd.s16 q13, q13, q14
|
||||
vmul.s16 q2, q2, q11 ; 3 * ( qs0 - ps0)
|
||||
vmul.s16 q13, q13, q11
|
||||
|
||||
;vqadd.s8 q1, q1, q2
|
||||
vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||
vaddw.s8 q13, q13, d3
|
||||
vmov.u8 q11, #0x03 ; 0x03
|
||||
vmov.u8 q12, #0x04 ; 0x04
|
||||
|
||||
vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d3, q13
|
||||
vaddw.s8 q2, q2, d28 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||
vaddw.s8 q13, q13, d29
|
||||
|
||||
vqmovn.s16 d28, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d29, q13
|
||||
|
||||
add r0, r0, #1
|
||||
add r2, r0, r1
|
||||
;;;;;;;;;;;
|
||||
add r3, r0, r1
|
||||
|
||||
vand q1, q1, q15 ; vp8_filter &= mask
|
||||
vand q14, q14, q15 ; vp8_filter &= mask
|
||||
|
||||
vqadd.s8 q2, q1, q11 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
||||
vqadd.s8 q1, q1, q12 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
||||
vqadd.s8 q2, q14, q11 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
||||
vqadd.s8 q3, q14, q12 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
||||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||
vshr.s8 q1, q1, #3 ; Filter1 >>= 3
|
||||
vshr.s8 q14, q3, #3 ; Filter1 >>= 3
|
||||
|
||||
;calculate output
|
||||
vqsub.s8 q10, q4, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1)
|
||||
vqadd.s8 q11, q5, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
|
||||
vqsub.s8 q10, q4, q14 ; u = vp8_signed_char_clamp(qs0 - Filter1)
|
||||
|
||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||
veor q6, q11, q0 ; *op0 = u^0x80
|
||||
|
||||
add r3, r2, r1
|
||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||
add r12, r1, r1
|
||||
vswp d13, d14
|
||||
add r12, r3, r1
|
||||
|
||||
;store op1, op0, oq0, oq1
|
||||
vst2.8 {d12[0], d13[0]}, [r0]
|
||||
vst2.8 {d12[1], d13[1]}, [r2]
|
||||
vst2.8 {d12[2], d13[2]}, [r3]
|
||||
vst2.8 {d12[3], d13[3]}, [r12], r1
|
||||
add r0, r12, r1
|
||||
vst2.8 {d12[4], d13[4]}, [r12]
|
||||
vst2.8 {d12[5], d13[5]}, [r0], r1
|
||||
add r2, r0, r1
|
||||
vst2.8 {d12[6], d13[6]}, [r0]
|
||||
vst2.8 {d12[7], d13[7]}, [r2], r1
|
||||
add r3, r2, r1
|
||||
vst2.8 {d14[0], d15[0]}, [r2]
|
||||
vst2.8 {d14[1], d15[1]}, [r3], r1
|
||||
add r12, r3, r1
|
||||
vst2.8 {d14[2], d15[2]}, [r3]
|
||||
vst2.8 {d14[3], d15[3]}, [r12], r1
|
||||
add r0, r12, r1
|
||||
vst2.8 {d14[4], d15[4]}, [r12]
|
||||
vst2.8 {d14[5], d15[5]}, [r0], r1
|
||||
add r2, r0, r1
|
||||
vst2.8 {d14[6], d15[6]}, [r0]
|
||||
vst2.8 {d14[7], d15[7]}, [r2]
|
||||
vst2.8 {d12[0], d13[0]}, [r0], r12
|
||||
vst2.8 {d12[1], d13[1]}, [r3], r12
|
||||
vst2.8 {d12[2], d13[2]}, [r0], r12
|
||||
vst2.8 {d12[3], d13[3]}, [r3], r12
|
||||
vst2.8 {d12[4], d13[4]}, [r0], r12
|
||||
vst2.8 {d12[5], d13[5]}, [r3], r12
|
||||
vst2.8 {d12[6], d13[6]}, [r0], r12
|
||||
vst2.8 {d12[7], d13[7]}, [r3], r12
|
||||
vst2.8 {d14[0], d15[0]}, [r0], r12
|
||||
vst2.8 {d14[1], d15[1]}, [r3], r12
|
||||
vst2.8 {d14[2], d15[2]}, [r0], r12
|
||||
vst2.8 {d14[3], d15[3]}, [r3], r12
|
||||
vst2.8 {d14[4], d15[4]}, [r0], r12
|
||||
vst2.8 {d14[5], d15[5]}, [r3], r12
|
||||
vst2.8 {d14[6], d15[6]}, [r0], r12
|
||||
vst2.8 {d14[7], d15[7]}, [r3]
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_loop_filter_simple_vertical_edge_neon|
|
||||
|
||||
;-----------------
|
||||
; r0 unsigned char *y
|
||||
; r1 int ystride
|
||||
; r2 const unsigned char *blimit
|
||||
|
||||
|vp8_loop_filter_bvs_neon| PROC
|
||||
push {r4, lr}
|
||||
ldrb r3, [r2] ; load blim from mem
|
||||
mov r4, r0
|
||||
add r0, r0, #4
|
||||
vdup.s8 q1, r3 ; duplicate blim
|
||||
bl vp8_loop_filter_simple_vertical_edge_neon
|
||||
; vp8_loop_filter_simple_vertical_edge_neon preserves r1 and q1
|
||||
add r0, r4, #8
|
||||
bl vp8_loop_filter_simple_vertical_edge_neon
|
||||
add r0, r4, #12
|
||||
pop {r4, lr}
|
||||
b vp8_loop_filter_simple_vertical_edge_neon
|
||||
ENDP ;|vp8_loop_filter_bvs_neon|
|
||||
|
||||
; r0 unsigned char *y
|
||||
; r1 int ystride
|
||||
; r2 const unsigned char *blimit
|
||||
|
||||
|vp8_loop_filter_mbvs_neon| PROC
|
||||
ldrb r3, [r2] ; load mblim from mem
|
||||
vdup.s8 q1, r3 ; duplicate mblim
|
||||
b vp8_loop_filter_simple_vertical_edge_neon
|
||||
ENDP ;|vp8_loop_filter_bvs_neon|
|
||||
END
|
||||
|
@ -14,155 +14,143 @@
|
||||
EXPORT |vp8_mbloop_filter_vertical_edge_y_neon|
|
||||
EXPORT |vp8_mbloop_filter_vertical_edge_uv_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; flimit, limit, and thresh should be positive numbers.
|
||||
; All 16 elements in these variables are equal.
|
||||
|
||||
; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; int count)
|
||||
; const unsigned char *blimit,
|
||||
; const unsigned char *limit,
|
||||
; const unsigned char *thresh)
|
||||
; r0 unsigned char *src,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 int count (unused)
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
sub r0, r0, r1, lsl #2 ; move src pointer down by 4 lines
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
push {lr}
|
||||
add r1, r1, r1 ; double stride
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
sub r0, r0, r1, lsl #1 ; move src pointer down by 4 lines
|
||||
vdup.u8 q2, r12 ; thresh
|
||||
add r12, r0, r1, lsr #1 ; move src pointer up by 1 line
|
||||
|
||||
vld1.u8 {q3}, [r0], r1 ; p3
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
vld1.u8 {q4}, [r0], r1 ; p2
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
vld1.u8 {q5}, [r0], r1 ; p1
|
||||
vld1.u8 {q6}, [r0], r1 ; p0
|
||||
vld1.u8 {q7}, [r0], r1 ; q0
|
||||
vld1.u8 {q8}, [r0], r1 ; q1
|
||||
vld1.u8 {q9}, [r0], r1 ; q2
|
||||
vld1.u8 {q10}, [r0], r1 ; q3
|
||||
vld1.u8 {q3}, [r0@128], r1 ; p3
|
||||
vld1.u8 {q4}, [r12@128], r1 ; p2
|
||||
vld1.u8 {q5}, [r0@128], r1 ; p1
|
||||
vld1.u8 {q6}, [r12@128], r1 ; p0
|
||||
vld1.u8 {q7}, [r0@128], r1 ; q0
|
||||
vld1.u8 {q8}, [r12@128], r1 ; q1
|
||||
vld1.u8 {q9}, [r0@128], r1 ; q2
|
||||
vld1.u8 {q10}, [r12@128], r1 ; q3
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #3
|
||||
add r0, r0, r1
|
||||
add r2, r0, r1
|
||||
add r3, r2, r1
|
||||
sub r12, r12, r1, lsl #2
|
||||
add r0, r12, r1, lsr #1
|
||||
|
||||
vst1.u8 {q4}, [r0] ; store op2
|
||||
vst1.u8 {q5}, [r2] ; store op1
|
||||
vst1.u8 {q6}, [r3], r1 ; store op0
|
||||
add r12, r3, r1
|
||||
vst1.u8 {q7}, [r3] ; store oq0
|
||||
vst1.u8 {q8}, [r12], r1 ; store oq1
|
||||
vst1.u8 {q9}, [r12] ; store oq2
|
||||
vst1.u8 {q4}, [r12@128],r1 ; store op2
|
||||
vst1.u8 {q5}, [r0@128],r1 ; store op1
|
||||
vst1.u8 {q6}, [r12@128], r1 ; store op0
|
||||
vst1.u8 {q7}, [r0@128],r1 ; store oq0
|
||||
vst1.u8 {q8}, [r12@128] ; store oq1
|
||||
vst1.u8 {q9}, [r0@128] ; store oq2
|
||||
|
||||
ldmia sp!, {pc}
|
||||
pop {pc}
|
||||
ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon|
|
||||
|
||||
; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; const unsigned char *blimit,
|
||||
; const unsigned char *limit,
|
||||
; const unsigned char *thresh,
|
||||
; unsigned char *v)
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|
||||
|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
ldr r3, [sp, #8] ; load v ptr
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
sub r3, r3, r1, lsl #2 ; move v pointer down by 4 lines
|
||||
push {lr}
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
|
||||
vdup.u8 q2, r12 ; thresh
|
||||
ldr r12, [sp, #8] ; load v ptr
|
||||
sub r12, r12, r1, lsl #2 ; move v pointer down by 4 lines
|
||||
|
||||
vld1.u8 {d6}, [r0], r1 ; p3
|
||||
vld1.u8 {d7}, [r3], r1 ; p3
|
||||
vld1.u8 {d8}, [r0], r1 ; p2
|
||||
vld1.u8 {d9}, [r3], r1 ; p2
|
||||
vld1.u8 {d10}, [r0], r1 ; p1
|
||||
vld1.u8 {d11}, [r3], r1 ; p1
|
||||
vld1.u8 {d12}, [r0], r1 ; p0
|
||||
vld1.u8 {d13}, [r3], r1 ; p0
|
||||
vld1.u8 {d14}, [r0], r1 ; q0
|
||||
vld1.u8 {d15}, [r3], r1 ; q0
|
||||
vld1.u8 {d16}, [r0], r1 ; q1
|
||||
vld1.u8 {d17}, [r3], r1 ; q1
|
||||
vld1.u8 {d18}, [r0], r1 ; q2
|
||||
vld1.u8 {d19}, [r3], r1 ; q2
|
||||
vld1.u8 {d20}, [r0], r1 ; q3
|
||||
vld1.u8 {d21}, [r3], r1 ; q3
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
vld1.u8 {d6}, [r0@64], r1 ; p3
|
||||
vld1.u8 {d7}, [r12@64], r1 ; p3
|
||||
vld1.u8 {d8}, [r0@64], r1 ; p2
|
||||
vld1.u8 {d9}, [r12@64], r1 ; p2
|
||||
vld1.u8 {d10}, [r0@64], r1 ; p1
|
||||
vld1.u8 {d11}, [r12@64], r1 ; p1
|
||||
vld1.u8 {d12}, [r0@64], r1 ; p0
|
||||
vld1.u8 {d13}, [r12@64], r1 ; p0
|
||||
vld1.u8 {d14}, [r0@64], r1 ; q0
|
||||
vld1.u8 {d15}, [r12@64], r1 ; q0
|
||||
vld1.u8 {d16}, [r0@64], r1 ; q1
|
||||
vld1.u8 {d17}, [r12@64], r1 ; q1
|
||||
vld1.u8 {d18}, [r0@64], r1 ; q2
|
||||
vld1.u8 {d19}, [r12@64], r1 ; q2
|
||||
vld1.u8 {d20}, [r0@64], r1 ; q3
|
||||
vld1.u8 {d21}, [r12@64], r1 ; q3
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #3
|
||||
sub r3, r3, r1, lsl #3
|
||||
sub r12, r12, r1, lsl #3
|
||||
|
||||
add r0, r0, r1
|
||||
add r3, r3, r1
|
||||
add r12, r12, r1
|
||||
|
||||
vst1.u8 {d8}, [r0], r1 ; store u op2
|
||||
vst1.u8 {d9}, [r3], r1 ; store v op2
|
||||
vst1.u8 {d10}, [r0], r1 ; store u op1
|
||||
vst1.u8 {d11}, [r3], r1 ; store v op1
|
||||
vst1.u8 {d12}, [r0], r1 ; store u op0
|
||||
vst1.u8 {d13}, [r3], r1 ; store v op0
|
||||
vst1.u8 {d14}, [r0], r1 ; store u oq0
|
||||
vst1.u8 {d15}, [r3], r1 ; store v oq0
|
||||
vst1.u8 {d16}, [r0], r1 ; store u oq1
|
||||
vst1.u8 {d17}, [r3], r1 ; store v oq1
|
||||
vst1.u8 {d18}, [r0], r1 ; store u oq2
|
||||
vst1.u8 {d19}, [r3], r1 ; store v oq2
|
||||
vst1.u8 {d8}, [r0@64], r1 ; store u op2
|
||||
vst1.u8 {d9}, [r12@64], r1 ; store v op2
|
||||
vst1.u8 {d10}, [r0@64], r1 ; store u op1
|
||||
vst1.u8 {d11}, [r12@64], r1 ; store v op1
|
||||
vst1.u8 {d12}, [r0@64], r1 ; store u op0
|
||||
vst1.u8 {d13}, [r12@64], r1 ; store v op0
|
||||
vst1.u8 {d14}, [r0@64], r1 ; store u oq0
|
||||
vst1.u8 {d15}, [r12@64], r1 ; store v oq0
|
||||
vst1.u8 {d16}, [r0@64], r1 ; store u oq1
|
||||
vst1.u8 {d17}, [r12@64], r1 ; store v oq1
|
||||
vst1.u8 {d18}, [r0@64], r1 ; store u oq2
|
||||
vst1.u8 {d19}, [r12@64], r1 ; store v oq2
|
||||
|
||||
ldmia sp!, {pc}
|
||||
pop {pc}
|
||||
ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
|
||||
|
||||
; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; int count)
|
||||
; const unsigned char *blimit,
|
||||
; const unsigned char *limit,
|
||||
; const unsigned char *thresh)
|
||||
; r0 unsigned char *src,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 int count (unused)
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
|vp8_mbloop_filter_vertical_edge_y_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
push {lr}
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
sub r0, r0, #4 ; move src pointer down by 4 columns
|
||||
vdup.s8 q2, r12 ; thresh
|
||||
add r12, r0, r1, lsl #3 ; move src pointer down by 8 lines
|
||||
|
||||
vld1.u8 {d6}, [r0], r1 ; load first 8-line src data
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
vld1.u8 {d7}, [r12], r1 ; load second 8-line src data
|
||||
vld1.u8 {d8}, [r0], r1
|
||||
sub sp, sp, #32
|
||||
vld1.u8 {d9}, [r12], r1
|
||||
vld1.u8 {d10}, [r0], r1
|
||||
vld1.u8 {d11}, [r12], r1
|
||||
vld1.u8 {d12}, [r0], r1
|
||||
vld1.u8 {d13}, [r12], r1
|
||||
vld1.u8 {d14}, [r0], r1
|
||||
vld1.u8 {d15}, [r12], r1
|
||||
vld1.u8 {d16}, [r0], r1
|
||||
vld1.u8 {d17}, [r12], r1
|
||||
vld1.u8 {d18}, [r0], r1
|
||||
vld1.u8 {d19}, [r12], r1
|
||||
vld1.u8 {d20}, [r0], r1
|
||||
|
||||
vld1.u8 {d7}, [r0], r1 ; load second 8-line src data
|
||||
vld1.u8 {d9}, [r0], r1
|
||||
vld1.u8 {d11}, [r0], r1
|
||||
vld1.u8 {d13}, [r0], r1
|
||||
vld1.u8 {d15}, [r0], r1
|
||||
vld1.u8 {d17}, [r0], r1
|
||||
vld1.u8 {d19}, [r0], r1
|
||||
vld1.u8 {d21}, [r0], r1
|
||||
vld1.u8 {d21}, [r12], r1
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
@ -180,133 +168,11 @@
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
mov r12, sp
|
||||
vst1.u8 {q3}, [r12]!
|
||||
vst1.u8 {q10}, [r12]!
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #4
|
||||
|
||||
add r2, r0, r1
|
||||
|
||||
add r3, r2, r1
|
||||
|
||||
vld1.u8 {q3}, [sp]!
|
||||
vld1.u8 {q10}, [sp]!
|
||||
|
||||
;transpose to 16x8 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
add r12, r3, r1
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
;store op2, op1, op0, oq0, oq1, oq2
|
||||
vst1.8 {d6}, [r0]
|
||||
vst1.8 {d8}, [r2]
|
||||
vst1.8 {d10}, [r3]
|
||||
vst1.8 {d12}, [r12], r1
|
||||
add r0, r12, r1
|
||||
vst1.8 {d14}, [r12]
|
||||
vst1.8 {d16}, [r0], r1
|
||||
add r2, r0, r1
|
||||
vst1.8 {d18}, [r0]
|
||||
vst1.8 {d20}, [r2], r1
|
||||
add r3, r2, r1
|
||||
vst1.8 {d7}, [r2]
|
||||
vst1.8 {d9}, [r3], r1
|
||||
add r12, r3, r1
|
||||
vst1.8 {d11}, [r3]
|
||||
vst1.8 {d13}, [r12], r1
|
||||
add r0, r12, r1
|
||||
vst1.8 {d15}, [r12]
|
||||
vst1.8 {d17}, [r0], r1
|
||||
add r2, r0, r1
|
||||
vst1.8 {d19}, [r0]
|
||||
vst1.8 {d21}, [r2]
|
||||
|
||||
ldmia sp!, {pc}
|
||||
ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
|
||||
|
||||
; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; unsigned char *v)
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
sub r0, r0, #4 ; move src pointer down by 4 columns
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
ldr r3, [sp, #8] ; load v ptr
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
|
||||
sub r3, r3, #4 ; move v pointer down by 4 columns
|
||||
|
||||
vld1.u8 {d6}, [r0], r1 ;load u data
|
||||
vld1.u8 {d7}, [r3], r1 ;load v data
|
||||
vld1.u8 {d8}, [r0], r1
|
||||
vld1.u8 {d9}, [r3], r1
|
||||
vld1.u8 {d10}, [r0], r1
|
||||
vld1.u8 {d11}, [r3], r1
|
||||
vld1.u8 {d12}, [r0], r1
|
||||
vld1.u8 {d13}, [r3], r1
|
||||
vld1.u8 {d14}, [r0], r1
|
||||
vld1.u8 {d15}, [r3], r1
|
||||
vld1.u8 {d16}, [r0], r1
|
||||
vld1.u8 {d17}, [r3], r1
|
||||
vld1.u8 {d18}, [r0], r1
|
||||
vld1.u8 {d19}, [r3], r1
|
||||
vld1.u8 {d20}, [r0], r1
|
||||
vld1.u8 {d21}, [r3], r1
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
sub sp, sp, #32
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
mov r12, sp
|
||||
vst1.u8 {q3}, [r12]!
|
||||
vst1.u8 {q10}, [r12]!
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #3
|
||||
sub r3, r3, r1, lsl #3
|
||||
|
||||
vld1.u8 {q3}, [sp]!
|
||||
vld1.u8 {q10}, [sp]!
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r12, r12, r1, lsl #3
|
||||
|
||||
;transpose to 16x8 matrix
|
||||
vtrn.32 q3, q7
|
||||
@ -326,23 +192,118 @@
|
||||
|
||||
;store op2, op1, op0, oq0, oq1, oq2
|
||||
vst1.8 {d6}, [r0], r1
|
||||
vst1.8 {d7}, [r3], r1
|
||||
vst1.8 {d7}, [r12], r1
|
||||
vst1.8 {d8}, [r0], r1
|
||||
vst1.8 {d9}, [r3], r1
|
||||
vst1.8 {d9}, [r12], r1
|
||||
vst1.8 {d10}, [r0], r1
|
||||
vst1.8 {d11}, [r3], r1
|
||||
vst1.8 {d11}, [r12], r1
|
||||
vst1.8 {d12}, [r0], r1
|
||||
vst1.8 {d13}, [r3], r1
|
||||
vst1.8 {d13}, [r12], r1
|
||||
vst1.8 {d14}, [r0], r1
|
||||
vst1.8 {d15}, [r3], r1
|
||||
vst1.8 {d15}, [r12], r1
|
||||
vst1.8 {d16}, [r0], r1
|
||||
vst1.8 {d17}, [r3], r1
|
||||
vst1.8 {d17}, [r12], r1
|
||||
vst1.8 {d18}, [r0], r1
|
||||
vst1.8 {d19}, [r3], r1
|
||||
vst1.8 {d20}, [r0], r1
|
||||
vst1.8 {d21}, [r3], r1
|
||||
vst1.8 {d19}, [r12], r1
|
||||
vst1.8 {d20}, [r0]
|
||||
vst1.8 {d21}, [r12]
|
||||
|
||||
ldmia sp!, {pc}
|
||||
pop {pc}
|
||||
ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
|
||||
|
||||
; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
|
||||
; const unsigned char *blimit,
|
||||
; const unsigned char *limit,
|
||||
; const unsigned char *thresh,
|
||||
; unsigned char *v)
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
|
||||
push {lr}
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
sub r0, r0, #4 ; move u pointer down by 4 columns
|
||||
vdup.u8 q2, r12 ; thresh
|
||||
ldr r12, [sp, #8] ; load v ptr
|
||||
sub r12, r12, #4 ; move v pointer down by 4 columns
|
||||
|
||||
vld1.u8 {d6}, [r0], r1 ;load u data
|
||||
vld1.u8 {d7}, [r12], r1 ;load v data
|
||||
vld1.u8 {d8}, [r0], r1
|
||||
vld1.u8 {d9}, [r12], r1
|
||||
vld1.u8 {d10}, [r0], r1
|
||||
vld1.u8 {d11}, [r12], r1
|
||||
vld1.u8 {d12}, [r0], r1
|
||||
vld1.u8 {d13}, [r12], r1
|
||||
vld1.u8 {d14}, [r0], r1
|
||||
vld1.u8 {d15}, [r12], r1
|
||||
vld1.u8 {d16}, [r0], r1
|
||||
vld1.u8 {d17}, [r12], r1
|
||||
vld1.u8 {d18}, [r0], r1
|
||||
vld1.u8 {d19}, [r12], r1
|
||||
vld1.u8 {d20}, [r0], r1
|
||||
vld1.u8 {d21}, [r12], r1
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
sub r0, r0, r1, lsl #3
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r12, r12, r1, lsl #3
|
||||
|
||||
;transpose to 16x8 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
;store op2, op1, op0, oq0, oq1, oq2
|
||||
vst1.8 {d6}, [r0], r1
|
||||
vst1.8 {d7}, [r12], r1
|
||||
vst1.8 {d8}, [r0], r1
|
||||
vst1.8 {d9}, [r12], r1
|
||||
vst1.8 {d10}, [r0], r1
|
||||
vst1.8 {d11}, [r12], r1
|
||||
vst1.8 {d12}, [r0], r1
|
||||
vst1.8 {d13}, [r12], r1
|
||||
vst1.8 {d14}, [r0], r1
|
||||
vst1.8 {d15}, [r12], r1
|
||||
vst1.8 {d16}, [r0], r1
|
||||
vst1.8 {d17}, [r12], r1
|
||||
vst1.8 {d18}, [r0], r1
|
||||
vst1.8 {d19}, [r12], r1
|
||||
vst1.8 {d20}, [r0]
|
||||
vst1.8 {d21}, [r12]
|
||||
|
||||
pop {pc}
|
||||
ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon|
|
||||
|
||||
; void vp8_mbloop_filter_neon()
|
||||
@ -350,26 +311,19 @@
|
||||
; functions do the necessary load, transpose (if necessary), preserve (if
|
||||
; necessary) and store.
|
||||
|
||||
; TODO:
|
||||
; The vertical filter writes p3/q3 back out because two 4 element writes are
|
||||
; much simpler than ordering and writing two 3 element sets (or three 2 elements
|
||||
; sets, or whichever other combinations are possible).
|
||||
; If we can preserve q3 and q10, the vertical filter will be able to avoid
|
||||
; storing those values on the stack and reading them back after the filter.
|
||||
|
||||
; r0,r1 PRESERVE
|
||||
; r2 flimit
|
||||
; r3 PRESERVE
|
||||
; q1 limit
|
||||
; r2 mblimit
|
||||
; r3 limit
|
||||
|
||||
; q2 thresh
|
||||
; q3 p3
|
||||
; q3 p3 PRESERVE
|
||||
; q4 p2
|
||||
; q5 p1
|
||||
; q6 p0
|
||||
; q7 q0
|
||||
; q8 q1
|
||||
; q9 q2
|
||||
; q10 q3
|
||||
; q10 q3 PRESERVE
|
||||
|
||||
|vp8_mbloop_filter_neon| PROC
|
||||
|
||||
@ -378,12 +332,12 @@
|
||||
vabd.u8 q12, q4, q5 ; abs(p2 - p1)
|
||||
vabd.u8 q13, q5, q6 ; abs(p1 - p0)
|
||||
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
|
||||
vabd.u8 q3, q9, q8 ; abs(q2 - q1)
|
||||
vabd.u8 q1, q9, q8 ; abs(q2 - q1)
|
||||
vabd.u8 q0, q10, q9 ; abs(q3 - q2)
|
||||
|
||||
vmax.u8 q11, q11, q12
|
||||
vmax.u8 q12, q13, q14
|
||||
vmax.u8 q3, q3, q0
|
||||
vmax.u8 q1, q1, q0
|
||||
vmax.u8 q15, q11, q12
|
||||
|
||||
vabd.u8 q12, q6, q7 ; abs(p0 - q0)
|
||||
@ -391,44 +345,46 @@
|
||||
; vp8_hevmask
|
||||
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh) * -1
|
||||
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh) * -1
|
||||
vmax.u8 q15, q15, q3
|
||||
vmax.u8 q15, q15, q1
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r2] ; flimit
|
||||
vdup.u8 q1, r3 ; limit
|
||||
vdup.u8 q2, r2 ; mblimit
|
||||
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
|
||||
vadd.u8 q2, q2, q2 ; flimit * 2
|
||||
vadd.u8 q2, q2, q1 ; flimit * 2 + limit
|
||||
vcge.u8 q15, q1, q15
|
||||
|
||||
vabd.u8 q1, q5, q8 ; a = abs(p1 - q1)
|
||||
vqadd.u8 q12, q12, q12 ; b = abs(p0 - q0) * 2
|
||||
vshr.u8 q1, q1, #1 ; a = a / 2
|
||||
vqadd.u8 q12, q12, q1 ; a = b + a
|
||||
vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1
|
||||
vmov.u16 q11, #3 ; #3
|
||||
|
||||
; vp8_filter
|
||||
; convert to signed
|
||||
veor q7, q7, q0 ; qs0
|
||||
vshr.u8 q1, q1, #1 ; a = a / 2
|
||||
veor q6, q6, q0 ; ps0
|
||||
veor q5, q5, q0 ; ps1
|
||||
|
||||
vqadd.u8 q12, q12, q1 ; a = b + a
|
||||
|
||||
veor q8, q8, q0 ; qs1
|
||||
veor q4, q4, q0 ; ps2
|
||||
veor q9, q9, q0 ; qs2
|
||||
|
||||
vorr q14, q13, q14 ; vp8_hevmask
|
||||
|
||||
vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1
|
||||
|
||||
vsubl.s8 q2, d14, d12 ; qs0 - ps0
|
||||
vsubl.s8 q13, d15, d13
|
||||
|
||||
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
|
||||
|
||||
vadd.s16 q10, q2, q2 ; 3 * (qs0 - ps0)
|
||||
vadd.s16 q11, q13, q13
|
||||
vmul.i16 q2, q2, q11 ; 3 * ( qs0 - ps0)
|
||||
|
||||
vand q15, q15, q12 ; vp8_filter_mask
|
||||
|
||||
vadd.s16 q2, q2, q10
|
||||
vadd.s16 q13, q13, q11
|
||||
vmul.i16 q13, q13, q11
|
||||
|
||||
vmov.u8 q12, #3 ; #3
|
||||
|
||||
@ -447,23 +403,19 @@
|
||||
|
||||
vand q13, q1, q14 ; Filter2 &= hev
|
||||
|
||||
vmov.u8 d7, #9 ; #9
|
||||
|
||||
vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4)
|
||||
vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3)
|
||||
|
||||
vmov.u8 d6, #18 ; #18
|
||||
vmov q0, q15
|
||||
|
||||
vshr.s8 q2, q2, #3 ; Filter1 >>= 3
|
||||
vshr.s8 q13, q13, #3 ; Filter2 >>= 3
|
||||
|
||||
vmov q10, q15
|
||||
vmov q11, q15
|
||||
vmov q12, q15
|
||||
|
||||
vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1)
|
||||
|
||||
vmov.u8 d5, #27 ; #27
|
||||
|
||||
vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2)
|
||||
|
||||
vbic q1, q1, q14 ; vp8_filter &= ~hev
|
||||
@ -471,35 +423,43 @@
|
||||
; roughly 1/7th difference across boundary
|
||||
; roughly 2/7th difference across boundary
|
||||
; roughly 3/7th difference across boundary
|
||||
vmov q11, q15
|
||||
|
||||
vmov.u8 d5, #9 ; #9
|
||||
vmov.u8 d4, #18 ; #18
|
||||
|
||||
vmov q13, q15
|
||||
vmov q14, q15
|
||||
|
||||
vmlal.s8 q10, d2, d7 ; Filter2 * 9
|
||||
vmlal.s8 q11, d3, d7
|
||||
vmlal.s8 q12, d2, d6 ; Filter2 * 18
|
||||
vmlal.s8 q13, d3, d6
|
||||
vmlal.s8 q14, d2, d5 ; Filter2 * 27
|
||||
vmlal.s8 q0, d2, d5 ; 63 + Filter2 * 9
|
||||
vmlal.s8 q11, d3, d5
|
||||
vmov.u8 d5, #27 ; #27
|
||||
vmlal.s8 q12, d2, d4 ; 63 + Filter2 * 18
|
||||
vmlal.s8 q13, d3, d4
|
||||
vmlal.s8 q14, d2, d5 ; 63 + Filter2 * 27
|
||||
vmlal.s8 q15, d3, d5
|
||||
vqshrn.s16 d20, q10, #7 ; u = clamp((63 + Filter2 * 9)>>7)
|
||||
vqshrn.s16 d21, q11, #7
|
||||
|
||||
vqshrn.s16 d0, q0, #7 ; u = clamp((63 + Filter2 * 9)>>7)
|
||||
vqshrn.s16 d1, q11, #7
|
||||
vqshrn.s16 d24, q12, #7 ; u = clamp((63 + Filter2 * 18)>>7)
|
||||
vqshrn.s16 d25, q13, #7
|
||||
vqshrn.s16 d28, q14, #7 ; u = clamp((63 + Filter2 * 27)>>7)
|
||||
vqshrn.s16 d29, q15, #7
|
||||
|
||||
vqsub.s8 q11, q9, q10 ; s = clamp(qs2 - u)
|
||||
vqadd.s8 q10, q4, q10 ; s = clamp(ps2 + u)
|
||||
vmov.u8 q1, #0x80 ; 0x80
|
||||
|
||||
vqsub.s8 q11, q9, q0 ; s = clamp(qs2 - u)
|
||||
vqadd.s8 q0, q4, q0 ; s = clamp(ps2 + u)
|
||||
vqsub.s8 q13, q8, q12 ; s = clamp(qs1 - u)
|
||||
vqadd.s8 q12, q5, q12 ; s = clamp(ps1 + u)
|
||||
vqsub.s8 q15, q7, q14 ; s = clamp(qs0 - u)
|
||||
vqadd.s8 q14, q6, q14 ; s = clamp(ps0 + u)
|
||||
veor q9, q11, q0 ; *oq2 = s^0x80
|
||||
veor q4, q10, q0 ; *op2 = s^0x80
|
||||
veor q8, q13, q0 ; *oq1 = s^0x80
|
||||
veor q5, q12, q0 ; *op2 = s^0x80
|
||||
veor q7, q15, q0 ; *oq0 = s^0x80
|
||||
veor q6, q14, q0 ; *op0 = s^0x80
|
||||
|
||||
veor q9, q11, q1 ; *oq2 = s^0x80
|
||||
veor q4, q0, q1 ; *op2 = s^0x80
|
||||
veor q8, q13, q1 ; *oq1 = s^0x80
|
||||
veor q5, q12, q1 ; *op2 = s^0x80
|
||||
veor q7, q15, q1 ; *oq0 = s^0x80
|
||||
veor q6, q14, q1 ; *op0 = s^0x80
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_mbloop_filter_neon|
|
||||
|
@ -108,9 +108,9 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_c;
|
||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
|
||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_c;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_c;
|
||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_c;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_c;
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
|
||||
|
||||
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_INTERNAL_STATS)
|
||||
|
@ -9,152 +9,149 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vpx_config.h"
|
||||
#include "loopfilter.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
typedef unsigned char uc;
|
||||
|
||||
|
||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
|
||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
|
||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
|
||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
|
||||
|
||||
prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
|
||||
prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
|
||||
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
|
||||
void vp8_init_loop_filter(VP8_COMMON *cm)
|
||||
static void lf_init_lut(loop_filter_info_n *lfi)
|
||||
{
|
||||
loop_filter_info *lfi = cm->lf_info;
|
||||
LOOPFILTERTYPE lft = cm->filter_type;
|
||||
int sharpness_lvl = cm->sharpness_level;
|
||||
int frame_type = cm->frame_type;
|
||||
int i, j;
|
||||
int filt_lvl;
|
||||
|
||||
int block_inside_limit = 0;
|
||||
int HEVThresh;
|
||||
|
||||
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
|
||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||
for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++)
|
||||
{
|
||||
int filt_lvl = i;
|
||||
|
||||
if (frame_type == KEY_FRAME)
|
||||
if (filt_lvl >= 40)
|
||||
{
|
||||
if (filt_lvl >= 40)
|
||||
HEVThresh = 2;
|
||||
else if (filt_lvl >= 15)
|
||||
HEVThresh = 1;
|
||||
else
|
||||
HEVThresh = 0;
|
||||
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2;
|
||||
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3;
|
||||
}
|
||||
else if (filt_lvl >= 20)
|
||||
{
|
||||
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
|
||||
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2;
|
||||
}
|
||||
else if (filt_lvl >= 15)
|
||||
{
|
||||
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
|
||||
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (filt_lvl >= 40)
|
||||
HEVThresh = 3;
|
||||
else if (filt_lvl >= 20)
|
||||
HEVThresh = 2;
|
||||
else if (filt_lvl >= 15)
|
||||
HEVThresh = 1;
|
||||
else
|
||||
HEVThresh = 0;
|
||||
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0;
|
||||
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
lfi->mode_lf_lut[DC_PRED] = 1;
|
||||
lfi->mode_lf_lut[V_PRED] = 1;
|
||||
lfi->mode_lf_lut[H_PRED] = 1;
|
||||
lfi->mode_lf_lut[TM_PRED] = 1;
|
||||
lfi->mode_lf_lut[B_PRED] = 0;
|
||||
|
||||
lfi->mode_lf_lut[ZEROMV] = 1;
|
||||
lfi->mode_lf_lut[NEARESTMV] = 2;
|
||||
lfi->mode_lf_lut[NEARMV] = 2;
|
||||
lfi->mode_lf_lut[NEWMV] = 2;
|
||||
lfi->mode_lf_lut[SPLITMV] = 3;
|
||||
|
||||
}
|
||||
|
||||
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
|
||||
int sharpness_lvl)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* For each possible value for the loop filter fill out limits */
|
||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||
{
|
||||
int filt_lvl = i;
|
||||
int block_inside_limit = 0;
|
||||
|
||||
/* Set loop filter paramaeters that control sharpness. */
|
||||
block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
|
||||
@ -169,119 +166,120 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
|
||||
if (block_inside_limit < 1)
|
||||
block_inside_limit = 1;
|
||||
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
lfi[i].lim[j] = block_inside_limit;
|
||||
lfi[i].mbflim[j] = filt_lvl + 2;
|
||||
lfi[i].flim[j] = filt_lvl;
|
||||
lfi[i].thr[j] = HEVThresh;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Set up the function pointers depending on the type of loop filtering selected */
|
||||
if (lft == NORMAL_LOOPFILTER)
|
||||
{
|
||||
cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v);
|
||||
cm->lf_bv = LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v);
|
||||
cm->lf_mbh = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h);
|
||||
cm->lf_bh = LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h);
|
||||
}
|
||||
else
|
||||
{
|
||||
cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v);
|
||||
cm->lf_bv = LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v);
|
||||
cm->lf_mbh = LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h);
|
||||
cm->lf_bh = LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h);
|
||||
vpx_memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
|
||||
vpx_memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit),
|
||||
SIMD_WIDTH);
|
||||
vpx_memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
|
||||
SIMD_WIDTH);
|
||||
}
|
||||
}
|
||||
|
||||
/* Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
|
||||
* each frame. Check last_frame_type to skip the function most of times.
|
||||
*/
|
||||
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
|
||||
void vp8_loop_filter_init(VP8_COMMON *cm)
|
||||
{
|
||||
int HEVThresh;
|
||||
int i, j;
|
||||
loop_filter_info_n *lfi = &cm->lf_info;
|
||||
int i;
|
||||
|
||||
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
|
||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||
/* init limits for given sharpness*/
|
||||
vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level);
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
|
||||
/* init LUT for lvl and hev thr picking */
|
||||
lf_init_lut(lfi);
|
||||
|
||||
/* init hev threshold const vectors */
|
||||
for(i = 0; i < 4 ; i++)
|
||||
{
|
||||
int filt_lvl = i;
|
||||
|
||||
if (frame_type == KEY_FRAME)
|
||||
{
|
||||
if (filt_lvl >= 40)
|
||||
HEVThresh = 2;
|
||||
else if (filt_lvl >= 15)
|
||||
HEVThresh = 1;
|
||||
else
|
||||
HEVThresh = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (filt_lvl >= 40)
|
||||
HEVThresh = 3;
|
||||
else if (filt_lvl >= 20)
|
||||
HEVThresh = 2;
|
||||
else if (filt_lvl >= 15)
|
||||
HEVThresh = 1;
|
||||
else
|
||||
HEVThresh = 0;
|
||||
}
|
||||
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
/*lfi[i].lim[j] = block_inside_limit;
|
||||
lfi[i].mbflim[j] = filt_lvl+2;*/
|
||||
/*lfi[i].flim[j] = filt_lvl;*/
|
||||
lfi[i].thr[j] = HEVThresh;
|
||||
}
|
||||
vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int filter_level)
|
||||
void vp8_loop_filter_frame_init(VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd,
|
||||
int default_filt_lvl,
|
||||
int sharpness_lvl)
|
||||
{
|
||||
MB_MODE_INFO *mbmi = &mbd->mode_info_context->mbmi;
|
||||
int seg, /* segment number */
|
||||
ref, /* index in ref_lf_deltas */
|
||||
mode; /* index in mode_lf_deltas */
|
||||
|
||||
if (mbd->mode_ref_lf_delta_enabled)
|
||||
loop_filter_info_n *lfi = &cm->lf_info;
|
||||
|
||||
/* update limits if sharpness has changed */
|
||||
if(cm->last_sharpness_level != sharpness_lvl)
|
||||
{
|
||||
vp8_loop_filter_update_sharpness(lfi, sharpness_lvl);
|
||||
cm->last_sharpness_level = sharpness_lvl;
|
||||
}
|
||||
|
||||
for(seg = 0; seg < MAX_MB_SEGMENTS; seg++)
|
||||
{
|
||||
int lvl_seg = default_filt_lvl;
|
||||
int lvl_ref, lvl_mode;
|
||||
|
||||
/* Note the baseline filter values for each segment */
|
||||
if (mbd->segmentation_enabled)
|
||||
{
|
||||
/* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
{
|
||||
lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
|
||||
}
|
||||
else /* Delta Value */
|
||||
{
|
||||
lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
|
||||
lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (!mbd->mode_ref_lf_delta_enabled)
|
||||
{
|
||||
/* we could get rid of this if we assume that deltas are set to
|
||||
* zero when not in use; encoder always uses deltas
|
||||
*/
|
||||
vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
|
||||
continue;
|
||||
}
|
||||
|
||||
lvl_ref = lvl_seg;
|
||||
|
||||
/* INTRA_FRAME */
|
||||
ref = INTRA_FRAME;
|
||||
|
||||
/* Apply delta for reference frame */
|
||||
filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
|
||||
lvl_ref += mbd->ref_lf_deltas[ref];
|
||||
|
||||
/* Apply delta for mode */
|
||||
if (mbmi->ref_frame == INTRA_FRAME)
|
||||
/* Apply delta for Intra modes */
|
||||
mode = 0; /* B_PRED */
|
||||
/* Only the split mode BPRED has a further special case */
|
||||
lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
|
||||
lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
|
||||
|
||||
lfi->lvl[seg][ref][mode] = lvl_mode;
|
||||
|
||||
mode = 1; /* all the rest of Intra modes */
|
||||
lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0; /* clamp */
|
||||
lfi->lvl[seg][ref][mode] = lvl_mode;
|
||||
|
||||
/* LAST, GOLDEN, ALT */
|
||||
for(ref = 1; ref < MAX_REF_FRAMES; ref++)
|
||||
{
|
||||
/* Only the split mode BPRED has a further special case */
|
||||
if (mbmi->mode == B_PRED)
|
||||
filter_level += mbd->mode_lf_deltas[0];
|
||||
int lvl_ref = lvl_seg;
|
||||
|
||||
/* Apply delta for reference frame */
|
||||
lvl_ref += mbd->ref_lf_deltas[ref];
|
||||
|
||||
/* Apply delta for Inter modes */
|
||||
for (mode = 1; mode < 4; mode++)
|
||||
{
|
||||
lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
|
||||
lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
|
||||
|
||||
lfi->lvl[seg][ref][mode] = lvl_mode;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Zero motion mode */
|
||||
if (mbmi->mode == ZEROMV)
|
||||
filter_level += mbd->mode_lf_deltas[1];
|
||||
|
||||
/* Split MB motion mode */
|
||||
else if (mbmi->mode == SPLITMV)
|
||||
filter_level += mbd->mode_lf_deltas[3];
|
||||
|
||||
/* All other inter motion modes (Nearest, Near, New) */
|
||||
else
|
||||
filter_level += mbd->mode_lf_deltas[2];
|
||||
}
|
||||
|
||||
/* Range check */
|
||||
if (filter_level > MAX_LOOP_FILTER)
|
||||
filter_level = MAX_LOOP_FILTER;
|
||||
else if (filter_level < 0)
|
||||
filter_level = 0;
|
||||
}
|
||||
return filter_level;
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_frame
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
@ -290,49 +288,23 @@ void vp8_loop_filter_frame
|
||||
)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||
loop_filter_info *lfi = cm->lf_info;
|
||||
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||
loop_filter_info lfi;
|
||||
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
|
||||
|
||||
int baseline_filter_level[MAX_MB_SEGMENTS];
|
||||
int filter_level;
|
||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
||||
|
||||
int i;
|
||||
unsigned char *y_ptr, *u_ptr, *v_ptr;
|
||||
|
||||
mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
|
||||
|
||||
/* Note the baseline filter values for each segment */
|
||||
if (alt_flt_enabled)
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
{
|
||||
/* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
/* Delta Value */
|
||||
else
|
||||
{
|
||||
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
baseline_filter_level[i] = default_filt_lvl;
|
||||
}
|
||||
/* Point at base of Mb MODE_INFO list */
|
||||
const MODE_INFO *mode_info_context = cm->mi;
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
||||
vp8_init_loop_filter(cm);
|
||||
else if (frame_type != cm->last_frame_type)
|
||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
||||
vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl, cm->sharpness_level);
|
||||
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = post->y_buffer;
|
||||
@ -344,51 +316,79 @@ void vp8_loop_filter_frame
|
||||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
|
||||
int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED &&
|
||||
mbd->mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mbd->mode_info_context->mbmi.mb_skip_coeff);
|
||||
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||
mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
filter_level = baseline_filter_level[Segment];
|
||||
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||
const int seg = mode_info_context->mbmi.segment_id;
|
||||
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||
|
||||
/* Distance of Mb to the various image edges.
|
||||
* These specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
* Apply any context driven MB level adjustment
|
||||
*/
|
||||
filter_level = vp8_adjust_mb_lf_value(mbd, filter_level);
|
||||
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
if (mb_col > 0)
|
||||
cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]);
|
||||
if (cm->filter_type == NORMAL_LOOPFILTER)
|
||||
{
|
||||
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||
lfi.mblim = lfi_n->mblim[filter_level];
|
||||
lfi.blim = lfi_n->blim[filter_level];
|
||||
lfi.lim = lfi_n->lim[filter_level];
|
||||
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||
|
||||
if (!skip_lf)
|
||||
cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]);
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
|
||||
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]);
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
|
||||
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]);
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
|
||||
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
|
||||
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
}
|
||||
}
|
||||
|
||||
y_ptr += 16;
|
||||
u_ptr += 8;
|
||||
v_ptr += 8;
|
||||
|
||||
mbd->mode_info_context++; /* step to next MB */
|
||||
mode_info_context++; /* step to next MB */
|
||||
}
|
||||
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
u_ptr += post->uv_stride * 8 - post->uv_width;
|
||||
v_ptr += post->uv_stride * 8 - post->uv_width;
|
||||
|
||||
mbd->mode_info_context++; /* Skip border mb */
|
||||
mode_info_context++; /* Skip border mb */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_frame_yonly
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
@ -399,49 +399,28 @@ void vp8_loop_filter_frame_yonly
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||
|
||||
int i;
|
||||
unsigned char *y_ptr;
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
|
||||
loop_filter_info *lfi = cm->lf_info;
|
||||
int baseline_filter_level[MAX_MB_SEGMENTS];
|
||||
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||
loop_filter_info lfi;
|
||||
|
||||
int filter_level;
|
||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
(void) sharpness_lvl;
|
||||
/* Point at base of Mb MODE_INFO list */
|
||||
const MODE_INFO *mode_info_context = cm->mi;
|
||||
|
||||
/*MODE_INFO * this_mb_mode_info = cm->mi;*/ /* Point at base of Mb MODE_INFO list */
|
||||
mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
|
||||
sharpness_lvl = cm->sharpness_level;
|
||||
|
||||
/* Note the baseline filter values for each segment */
|
||||
if (alt_flt_enabled)
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
{
|
||||
/* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
/* Delta Value */
|
||||
else
|
||||
{
|
||||
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
baseline_filter_level[i] = default_filt_lvl;
|
||||
}
|
||||
#if 0
|
||||
if(default_filt_lvl == 0) /* no filter applied */
|
||||
return;
|
||||
#endif
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
||||
vp8_init_loop_filter(cm);
|
||||
else if (frame_type != cm->last_frame_type)
|
||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
||||
vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl, sharpness_lvl);
|
||||
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = post->y_buffer;
|
||||
@ -451,44 +430,75 @@ void vp8_loop_filter_frame_yonly
|
||||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
|
||||
int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED &&
|
||||
mbd->mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mbd->mode_info_context->mbmi.mb_skip_coeff);
|
||||
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||
mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
filter_level = baseline_filter_level[Segment];
|
||||
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||
const int seg = mode_info_context->mbmi.segment_id;
|
||||
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||
|
||||
/* Apply any context driven MB level adjustment */
|
||||
filter_level = vp8_adjust_mb_lf_value(mbd, filter_level);
|
||||
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
if (mb_col > 0)
|
||||
cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
||||
if (cm->filter_type == NORMAL_LOOPFILTER)
|
||||
{
|
||||
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||
lfi.mblim = lfi_n->mblim[filter_level];
|
||||
lfi.blim = lfi_n->blim[filter_level];
|
||||
lfi.lim = lfi_n->lim[filter_level];
|
||||
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||
|
||||
if (!skip_lf)
|
||||
cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
}
|
||||
}
|
||||
|
||||
y_ptr += 16;
|
||||
mbd->mode_info_context ++; /* step to next MB */
|
||||
mode_info_context ++; /* step to next MB */
|
||||
|
||||
}
|
||||
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
mbd->mode_info_context ++; /* Skip border mb */
|
||||
mode_info_context ++; /* Skip border mb */
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_partial_frame
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
@ -500,25 +510,32 @@ void vp8_loop_filter_partial_frame
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||
|
||||
int i;
|
||||
unsigned char *y_ptr;
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
/*int mb_rows = post->y_height >> 4;*/
|
||||
int mb_cols = post->y_width >> 4;
|
||||
|
||||
int linestocopy;
|
||||
int linestocopy, i;
|
||||
|
||||
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||
loop_filter_info lfi;
|
||||
|
||||
loop_filter_info *lfi = cm->lf_info;
|
||||
int baseline_filter_level[MAX_MB_SEGMENTS];
|
||||
int filter_level;
|
||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
(void) sharpness_lvl;
|
||||
const MODE_INFO *mode_info_context;
|
||||
|
||||
/*MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);*/ /* Point at base of Mb MODE_INFO list */
|
||||
mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); /* Point at base of Mb MODE_INFO list */
|
||||
int lvl_seg[MAX_MB_SEGMENTS];
|
||||
|
||||
sharpness_lvl = cm->sharpness_level;
|
||||
|
||||
#if 0
|
||||
if(default_filt_lvl == 0) /* no filter applied */
|
||||
return;
|
||||
#endif
|
||||
|
||||
mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
|
||||
|
||||
linestocopy = (post->y_height >> (4 + Fraction));
|
||||
|
||||
@ -531,29 +548,24 @@ void vp8_loop_filter_partial_frame
|
||||
if (alt_flt_enabled)
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
{
|
||||
/* Abs value */
|
||||
{ /* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
{
|
||||
lvl_seg[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
}
|
||||
/* Delta Value */
|
||||
else
|
||||
{
|
||||
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
||||
lvl_seg[i] = default_filt_lvl
|
||||
+ mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
lvl_seg[i] = (lvl_seg[i] > 0) ?
|
||||
((lvl_seg[i] > 63) ? 63: lvl_seg[i]) : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
baseline_filter_level[i] = default_filt_lvl;
|
||||
}
|
||||
lvl_seg[0] = default_filt_lvl;
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
||||
vp8_init_loop_filter(cm);
|
||||
else if (frame_type != cm->last_frame_type)
|
||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
||||
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
|
||||
@ -563,32 +575,64 @@ void vp8_loop_filter_partial_frame
|
||||
{
|
||||
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
||||
{
|
||||
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
|
||||
int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED &&
|
||||
mbd->mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mbd->mode_info_context->mbmi.mb_skip_coeff);
|
||||
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||
mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
filter_level = baseline_filter_level[Segment];
|
||||
if (alt_flt_enabled)
|
||||
filter_level = lvl_seg[mode_info_context->mbmi.segment_id];
|
||||
else
|
||||
filter_level = lvl_seg[0];
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
if (mb_col > 0)
|
||||
cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
||||
if (cm->filter_type == NORMAL_LOOPFILTER)
|
||||
{
|
||||
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||
lfi.mblim = lfi_n->mblim[filter_level];
|
||||
lfi.blim = lfi_n->blim[filter_level];
|
||||
lfi.lim = lfi_n->lim[filter_level];
|
||||
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||
|
||||
if (!skip_lf)
|
||||
cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]);
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
}
|
||||
}
|
||||
|
||||
y_ptr += 16;
|
||||
mbd->mode_info_context += 1; /* step to next MB */
|
||||
mode_info_context += 1; /* step to next MB */
|
||||
}
|
||||
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
mbd->mode_info_context += 1; /* Skip border mb */
|
||||
mode_info_context += 1; /* Skip border mb */
|
||||
}
|
||||
}
|
||||
|
@ -13,6 +13,7 @@
|
||||
#define loopfilter_h
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_config.h"
|
||||
|
||||
#define MAX_LOOP_FILTER 63
|
||||
|
||||
@ -22,27 +23,46 @@ typedef enum
|
||||
SIMPLE_LOOPFILTER = 1
|
||||
} LOOPFILTERTYPE;
|
||||
|
||||
/* FRK
|
||||
* Need to align this structure so when it is declared and
|
||||
#if ARCH_ARM
|
||||
#define SIMD_WIDTH 1
|
||||
#else
|
||||
#define SIMD_WIDTH 16
|
||||
#endif
|
||||
|
||||
/* Need to align this structure so when it is declared and
|
||||
* passed it can be loaded into vector registers.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
DECLARE_ALIGNED(16, signed char, lim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, flim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, thr[16]);
|
||||
DECLARE_ALIGNED(16, signed char, mbflim[16]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
|
||||
unsigned char lvl[4][4][4];
|
||||
unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
|
||||
unsigned char mode_lf_lut[10];
|
||||
} loop_filter_info_n;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const unsigned char * mblim;
|
||||
const unsigned char * blim;
|
||||
const unsigned char * lim;
|
||||
const unsigned char * hev_thr;
|
||||
} loop_filter_info;
|
||||
|
||||
|
||||
#define prototype_loopfilter(sym) \
|
||||
void sym(unsigned char *src, int pitch, const signed char *flimit,\
|
||||
const signed char *limit, const signed char *thresh, int count)
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||
const unsigned char *limit, const unsigned char *thresh, int count)
|
||||
|
||||
#define prototype_loopfilter_block(sym) \
|
||||
void sym(unsigned char *y, unsigned char *u, unsigned char *v,\
|
||||
void sym(unsigned char *y, unsigned char *u, unsigned char *v, \
|
||||
int ystride, int uv_stride, loop_filter_info *lfi)
|
||||
|
||||
#define prototype_simple_loopfilter(sym) \
|
||||
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#include "x86/loopfilter_x86.h"
|
||||
#endif
|
||||
@ -71,38 +91,39 @@ extern prototype_loopfilter_block(vp8_lf_normal_mb_h);
|
||||
#endif
|
||||
extern prototype_loopfilter_block(vp8_lf_normal_b_h);
|
||||
|
||||
|
||||
#ifndef vp8_lf_simple_mb_v
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_c
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_c
|
||||
#endif
|
||||
extern prototype_loopfilter_block(vp8_lf_simple_mb_v);
|
||||
extern prototype_simple_loopfilter(vp8_lf_simple_mb_v);
|
||||
|
||||
#ifndef vp8_lf_simple_b_v
|
||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_c
|
||||
#endif
|
||||
extern prototype_loopfilter_block(vp8_lf_simple_b_v);
|
||||
extern prototype_simple_loopfilter(vp8_lf_simple_b_v);
|
||||
|
||||
#ifndef vp8_lf_simple_mb_h
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_c
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_c
|
||||
#endif
|
||||
extern prototype_loopfilter_block(vp8_lf_simple_mb_h);
|
||||
extern prototype_simple_loopfilter(vp8_lf_simple_mb_h);
|
||||
|
||||
#ifndef vp8_lf_simple_b_h
|
||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_c
|
||||
#endif
|
||||
extern prototype_loopfilter_block(vp8_lf_simple_b_h);
|
||||
extern prototype_simple_loopfilter(vp8_lf_simple_b_h);
|
||||
|
||||
typedef prototype_loopfilter_block((*vp8_lf_block_fn_t));
|
||||
typedef prototype_simple_loopfilter((*vp8_slf_block_fn_t));
|
||||
|
||||
typedef struct
|
||||
{
|
||||
vp8_lf_block_fn_t normal_mb_v;
|
||||
vp8_lf_block_fn_t normal_b_v;
|
||||
vp8_lf_block_fn_t normal_mb_h;
|
||||
vp8_lf_block_fn_t normal_b_h;
|
||||
vp8_lf_block_fn_t simple_mb_v;
|
||||
vp8_lf_block_fn_t simple_b_v;
|
||||
vp8_lf_block_fn_t simple_mb_h;
|
||||
vp8_lf_block_fn_t simple_b_h;
|
||||
vp8_slf_block_fn_t simple_mb_v;
|
||||
vp8_slf_block_fn_t simple_b_v;
|
||||
vp8_slf_block_fn_t simple_mb_h;
|
||||
vp8_slf_block_fn_t simple_b_h;
|
||||
} vp8_loopfilter_rtcd_vtable_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
@ -115,9 +136,9 @@ typedef void loop_filter_uvfunction
|
||||
(
|
||||
unsigned char *u, /* source pointer */
|
||||
int p, /* pitch */
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
unsigned char *v
|
||||
);
|
||||
|
||||
|
@ -24,8 +24,9 @@ static __inline signed char vp8_signed_char_clamp(int t)
|
||||
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
|
||||
uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
|
||||
static __inline signed char vp8_filter_mask(uc limit, uc blimit,
|
||||
uc p3, uc p2, uc p1, uc p0,
|
||||
uc q0, uc q1, uc q2, uc q3)
|
||||
{
|
||||
signed char mask = 0;
|
||||
mask |= (abs(p3 - p2) > limit) * -1;
|
||||
@ -34,13 +35,13 @@ static __inline signed char vp8_filter_mask(signed char limit, signed char flimi
|
||||
mask |= (abs(q1 - q0) > limit) * -1;
|
||||
mask |= (abs(q2 - q1) > limit) * -1;
|
||||
mask |= (abs(q3 - q2) > limit) * -1;
|
||||
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit) * -1;
|
||||
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
|
||||
mask = ~mask;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
|
||||
static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
|
||||
static __inline signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
signed char hev = 0;
|
||||
hev |= (abs(p1 - p0) > thresh) * -1;
|
||||
@ -48,7 +49,8 @@ static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0,
|
||||
return hev;
|
||||
}
|
||||
|
||||
static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
|
||||
static __inline void vp8_filter(signed char mask, uc hev, uc *op1,
|
||||
uc *op0, uc *oq0, uc *oq1)
|
||||
|
||||
{
|
||||
signed char ps0, qs0;
|
||||
@ -98,9 +100,9 @@ void vp8_loop_filter_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p, /* pitch */
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
@ -113,11 +115,11 @@ void vp8_loop_filter_horizontal_edge_c
|
||||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
|
||||
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
|
||||
@ -130,9 +132,9 @@ void vp8_loop_filter_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
@ -145,10 +147,10 @@ void vp8_loop_filter_vertical_edge_c
|
||||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[i], s[-2], s[-1], s[0], s[1]);
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
|
||||
|
||||
@ -157,7 +159,7 @@ void vp8_loop_filter_vertical_edge_c
|
||||
while (++i < count * 8);
|
||||
}
|
||||
|
||||
static __inline void vp8_mbfilter(signed char mask, signed char hev,
|
||||
static __inline void vp8_mbfilter(signed char mask, uc hev,
|
||||
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
|
||||
{
|
||||
signed char s, u;
|
||||
@ -216,9 +218,9 @@ void vp8_mbloop_filter_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
@ -232,11 +234,11 @@ void vp8_mbloop_filter_horizontal_edge_c
|
||||
do
|
||||
{
|
||||
|
||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
|
||||
|
||||
@ -251,9 +253,9 @@ void vp8_mbloop_filter_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
@ -264,10 +266,10 @@ void vp8_mbloop_filter_vertical_edge_c
|
||||
do
|
||||
{
|
||||
|
||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[i], s[-2], s[-1], s[0], s[1]);
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
|
||||
|
||||
@ -278,13 +280,13 @@ void vp8_mbloop_filter_vertical_edge_c
|
||||
}
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
|
||||
static __inline signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
/* Why does this cause problems for win32?
|
||||
* error C2143: syntax error : missing ';' before 'type'
|
||||
* (void) limit;
|
||||
*/
|
||||
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= flimit * 2 + limit) * -1;
|
||||
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
|
||||
return mask;
|
||||
}
|
||||
|
||||
@ -317,47 +319,37 @@ void vp8_loop_filter_simple_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
int count
|
||||
const unsigned char *blimit
|
||||
)
|
||||
{
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
(void) thresh;
|
||||
|
||||
do
|
||||
{
|
||||
/*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);*/
|
||||
mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
++s;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
while (++i < 16);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
int count
|
||||
const unsigned char *blimit
|
||||
)
|
||||
{
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
(void) thresh;
|
||||
|
||||
do
|
||||
{
|
||||
/*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);*/
|
||||
mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2], s[-1], s[0], s[1]);
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
|
||||
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
|
||||
s += p;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
while (++i < 16);
|
||||
|
||||
}
|
||||
|
@ -83,6 +83,7 @@ typedef struct VP8_COMMON_RTCD
|
||||
} VP8_COMMON_RTCD;
|
||||
|
||||
typedef struct VP8Common
|
||||
|
||||
{
|
||||
struct vpx_internal_error_info error;
|
||||
|
||||
@ -107,7 +108,8 @@ typedef struct VP8Common
|
||||
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG temp_scale_frame;
|
||||
|
||||
FRAME_TYPE last_frame_type; /* Save last frame's frame type for loopfilter init checking and motion search. */
|
||||
|
||||
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int show_frame;
|
||||
@ -149,11 +151,9 @@ typedef struct VP8Common
|
||||
INTERPOLATIONFILTERTYPE mcomp_filter_type;
|
||||
LOOPFILTERTYPE last_filter_type;
|
||||
LOOPFILTERTYPE filter_type;
|
||||
loop_filter_info lf_info[MAX_LOOP_FILTER+1];
|
||||
prototype_loopfilter_block((*lf_mbv));
|
||||
prototype_loopfilter_block((*lf_mbh));
|
||||
prototype_loopfilter_block((*lf_bv));
|
||||
prototype_loopfilter_block((*lf_bh));
|
||||
|
||||
loop_filter_info_n lf_info;
|
||||
|
||||
int filter_level;
|
||||
int last_sharpness_level;
|
||||
int sharpness_level;
|
||||
@ -206,10 +206,9 @@ typedef struct VP8Common
|
||||
struct postproc_state postproc_state;
|
||||
} VP8_COMMON;
|
||||
|
||||
|
||||
int vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int filter_level);
|
||||
void vp8_init_loop_filter(VP8_COMMON *cm);
|
||||
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type);
|
||||
extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
|
||||
void vp8_loop_filter_init(VP8_COMMON *cm);
|
||||
void vp8_loop_filter_frame_init(VP8_COMMON *cm, MACROBLOCKD *mbd,
|
||||
int default_filt_lvl, int sharpness_lvl);
|
||||
void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
|
||||
|
||||
#endif
|
||||
|
@ -16,7 +16,7 @@
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
@ -122,12 +122,10 @@ next8_h:
|
||||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit ; get flimit
|
||||
movq mm2, [rdx] ; flimit mm2
|
||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
||||
paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
|
||||
mov rdx, arg(2) ;blimit ; get blimit
|
||||
movq mm7, [rdx] ; blimit
|
||||
|
||||
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
por mm1, mm5
|
||||
pxor mm5, mm5
|
||||
pcmpeqb mm1, mm5 ; mask mm1
|
||||
@ -230,7 +228,7 @@ next8_h:
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
@ -406,9 +404,9 @@ next8_v:
|
||||
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
psrlw mm5, 1 ; abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit ;
|
||||
mov rdx, arg(2) ;blimit ;
|
||||
|
||||
movq mm2, [rdx] ;flimit mm2
|
||||
movq mm4, [rdx] ;blimit
|
||||
movq mm1, mm3 ; mm1=mm3=p0
|
||||
|
||||
movq mm7, mm6 ; mm7=mm6=q0
|
||||
@ -419,10 +417,7 @@ next8_v:
|
||||
paddusb mm1, mm1 ; abs(q0-p0)*2
|
||||
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
||||
paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
por mm1, mm0; ; mask
|
||||
|
||||
pxor mm0, mm0
|
||||
@ -603,7 +598,7 @@ next8_v:
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
@ -719,17 +714,15 @@ next8_mbh:
|
||||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit ; get flimit
|
||||
movq mm2, [rdx] ; flimit mm2
|
||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
||||
paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
|
||||
mov rdx, arg(2) ;blimit ; get blimit
|
||||
movq mm7, [rdx] ; blimit
|
||||
|
||||
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
por mm1, mm5
|
||||
pxor mm5, mm5
|
||||
pcmpeqb mm1, mm5 ; mask mm1
|
||||
|
||||
; mm1 = mask, mm0=q0, mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
|
||||
; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
|
||||
; mm6 = p0,
|
||||
|
||||
; calculate high edge variance
|
||||
@ -922,7 +915,7 @@ next8_mbh:
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
@ -1108,9 +1101,9 @@ next8_mbv:
|
||||
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
psrlw mm5, 1 ; abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit ;
|
||||
mov rdx, arg(2) ;blimit ;
|
||||
|
||||
movq mm2, [rdx] ;flimit mm2
|
||||
movq mm4, [rdx] ;blimit
|
||||
movq mm1, mm3 ; mm1=mm3=p0
|
||||
|
||||
movq mm7, mm6 ; mm7=mm6=q0
|
||||
@ -1121,10 +1114,7 @@ next8_mbv:
|
||||
paddusb mm1, mm1 ; abs(q0-p0)*2
|
||||
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
||||
paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
por mm1, mm0; ; mask
|
||||
|
||||
pxor mm0, mm0
|
||||
@ -1392,16 +1382,13 @@ next8_mbv:
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
; const char *blimit
|
||||
;)
|
||||
global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
|
||||
sym(vp8_loop_filter_simple_horizontal_edge_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
@ -1410,14 +1397,10 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx):
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||
|
||||
movsxd rcx, dword ptr arg(5) ;count
|
||||
mov rcx, 2 ; count
|
||||
nexts8_h:
|
||||
mov rdx, arg(3) ;limit
|
||||
movq mm7, [rdx]
|
||||
mov rdx, arg(2) ;flimit ; get flimit
|
||||
mov rdx, arg(2) ;blimit ; get blimit
|
||||
movq mm3, [rdx] ;
|
||||
paddb mm3, mm3 ; flimit*2 (less than 255)
|
||||
paddb mm3, mm7 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
||||
add rdi, rax
|
||||
@ -1445,7 +1428,7 @@ nexts8_h:
|
||||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||
paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
pxor mm3, mm3
|
||||
pcmpeqb mm5, mm3
|
||||
|
||||
@ -1515,16 +1498,13 @@ nexts8_h:
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
; const char *blimit
|
||||
;)
|
||||
global sym(vp8_loop_filter_simple_vertical_edge_mmx)
|
||||
sym(vp8_loop_filter_simple_vertical_edge_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
@ -1539,7 +1519,7 @@ sym(vp8_loop_filter_simple_vertical_edge_mmx):
|
||||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||
|
||||
lea rsi, [rsi + rax*4- 2]; ;
|
||||
movsxd rcx, dword ptr arg(5) ;count
|
||||
mov rcx, 2 ; count
|
||||
nexts8_v:
|
||||
|
||||
lea rdi, [rsi + rax];
|
||||
@ -1602,14 +1582,10 @@ nexts8_v:
|
||||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||
paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit ; get flimit
|
||||
mov rdx, arg(2) ;blimit ; get blimit
|
||||
movq mm7, [rdx]
|
||||
mov rdx, arg(3) ; get limit
|
||||
movq mm6, [rdx]
|
||||
paddb mm7, mm7 ; flimit*2 (less than 255)
|
||||
paddb mm7, mm6 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
pxor mm7, mm7
|
||||
pcmpeqb mm5, mm7 ; mm5 = mask
|
||||
|
||||
|
@ -110,7 +110,7 @@
|
||||
psubusb xmm6, xmm5 ; p1-=p0
|
||||
|
||||
por xmm6, xmm4 ; abs(p1 - p0)
|
||||
mov rdx, arg(2) ; get flimit
|
||||
mov rdx, arg(2) ; get blimit
|
||||
|
||||
movdqa t1, xmm6 ; save to t1
|
||||
|
||||
@ -123,7 +123,7 @@
|
||||
psubusb xmm1, xmm7
|
||||
por xmm2, xmm3 ; abs(p1-q1)
|
||||
|
||||
movdqa xmm4, XMMWORD PTR [rdx] ; flimit
|
||||
movdqa xmm7, XMMWORD PTR [rdx] ; blimit
|
||||
|
||||
movdqa xmm3, xmm0 ; q0
|
||||
pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
@ -134,13 +134,11 @@
|
||||
psrlw xmm2, 1 ; abs(p1-q1)/2
|
||||
|
||||
psubusb xmm5, xmm3 ; p0-=q0
|
||||
paddb xmm4, xmm4 ; flimit*2 (less than 255)
|
||||
|
||||
psubusb xmm3, xmm6 ; q0-=p0
|
||||
por xmm5, xmm3 ; abs(p0 - q0)
|
||||
|
||||
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
||||
paddb xmm7, xmm4 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
movdqa xmm4, t0 ; hev get abs (q1 - q0)
|
||||
|
||||
@ -150,7 +148,7 @@
|
||||
|
||||
movdqa xmm2, XMMWORD PTR [rdx] ; hev
|
||||
|
||||
psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
psubusb xmm4, xmm2 ; hev
|
||||
|
||||
psubusb xmm3, xmm2 ; hev
|
||||
@ -278,7 +276,7 @@
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
@ -328,7 +326,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
@ -574,7 +572,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
@ -624,7 +622,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
|
||||
;(
|
||||
; unsigned char *u,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; unsigned char *v
|
||||
@ -904,7 +902,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
||||
movdqa xmm4, XMMWORD PTR [rdx]; limit
|
||||
|
||||
pmaxub xmm0, xmm7
|
||||
mov rdx, arg(2) ; flimit
|
||||
mov rdx, arg(2) ; blimit
|
||||
|
||||
psubusb xmm0, xmm4
|
||||
movdqa xmm5, xmm2 ; q1
|
||||
@ -921,12 +919,11 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
||||
psrlw xmm5, 1 ; abs(p1-q1)/2
|
||||
psubusb xmm6, xmm3 ; q0-p0
|
||||
|
||||
movdqa xmm2, XMMWORD PTR [rdx]; flimit
|
||||
movdqa xmm4, XMMWORD PTR [rdx]; blimit
|
||||
|
||||
mov rdx, arg(4) ; get thresh
|
||||
|
||||
por xmm1, xmm6 ; abs(q0-p0)
|
||||
paddb xmm2, xmm2 ; flimit*2 (less than 255)
|
||||
|
||||
movdqa xmm6, t0 ; get abs (q1 - q0)
|
||||
|
||||
@ -939,10 +936,9 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
||||
paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh
|
||||
|
||||
paddb xmm4, xmm2 ; flimit * 2 + limit (less than 255)
|
||||
psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh
|
||||
|
||||
psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
|
||||
|
||||
por xmm1, xmm0 ; mask
|
||||
@ -1014,7 +1010,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
@ -1081,7 +1077,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
|
||||
;(
|
||||
; unsigned char *u,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; unsigned char *v
|
||||
@ -1239,7 +1235,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
@ -1308,7 +1304,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
|
||||
;(
|
||||
; unsigned char *u,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; unsigned char *v
|
||||
@ -1376,16 +1372,13 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
; const char *blimit,
|
||||
;)
|
||||
global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
|
||||
sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
@ -1394,13 +1387,8 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||
mov rdx, arg(2) ;flimit ; get flimit
|
||||
mov rdx, arg(2) ;blimit
|
||||
movdqa xmm3, XMMWORD PTR [rdx]
|
||||
mov rdx, arg(3) ;limit
|
||||
movdqa xmm7, XMMWORD PTR [rdx]
|
||||
|
||||
paddb xmm3, xmm3 ; flimit*2 (less than 255)
|
||||
paddb xmm3, xmm7 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
||||
add rdi, rax
|
||||
@ -1428,7 +1416,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
||||
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
||||
paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
pxor xmm3, xmm3
|
||||
pcmpeqb xmm5, xmm3
|
||||
|
||||
@ -1493,16 +1481,13 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
; const char *blimit,
|
||||
;)
|
||||
global sym(vp8_loop_filter_simple_vertical_edge_sse2)
|
||||
sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
||||
push rbp ; save old base pointer value.
|
||||
mov rbp, rsp ; set new base pointer value.
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx ; save callee-saved reg
|
||||
push rsi
|
||||
@ -1607,14 +1592,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
||||
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
||||
paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit
|
||||
mov rdx, arg(2) ;blimit
|
||||
movdqa xmm7, XMMWORD PTR [rdx]
|
||||
mov rdx, arg(3) ; get limit
|
||||
movdqa xmm6, XMMWORD PTR [rdx]
|
||||
paddb xmm7, xmm7 ; flimit*2 (less than 255)
|
||||
paddb xmm7, xmm6 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
pxor xmm7, xmm7
|
||||
pcmpeqb xmm5, xmm7 ; mm5 = mask
|
||||
|
||||
|
@ -9,30 +9,18 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
|
||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
|
||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
|
||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
|
||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
|
||||
|
||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
|
||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
|
||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx);
|
||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
|
||||
|
||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2);
|
||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2);
|
||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2);
|
||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
|
||||
prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
|
||||
|
||||
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
|
||||
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
|
||||
@ -44,23 +32,13 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
|
||||
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
@ -68,23 +46,13 @@ void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
@ -92,27 +60,23 @@ void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
|
||||
@ -120,27 +84,23 @@ void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
||||
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -150,20 +110,10 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
||||
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
@ -171,20 +121,10 @@ void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
||||
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
@ -192,24 +132,20 @@ void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
||||
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride);
|
||||
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
|
||||
@ -217,36 +153,20 @@ void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4);
|
||||
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
void vp8_fast_loop_filter_vertical_edges_sse(unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
#endif
|
||||
|
@ -24,10 +24,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_mmx);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_mmx);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_mmx);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_mmx);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_mmx);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_mmx);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_mmx);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_mmx);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_mmx);
|
||||
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
@ -44,13 +44,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
|
||||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_mmx
|
||||
|
||||
#undef vp8_lf_simple_mb_v
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_mmx
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_mmx
|
||||
|
||||
#undef vp8_lf_simple_b_v
|
||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_mmx
|
||||
|
||||
#undef vp8_lf_simple_mb_h
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_mmx
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_mmx
|
||||
|
||||
#undef vp8_lf_simple_b_h
|
||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_mmx
|
||||
@ -63,10 +63,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_sse2);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_sse2);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_sse2);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_sse2);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_sse2);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_sse2);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_sse2);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_sse2);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_sse2);
|
||||
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
@ -83,13 +83,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
|
||||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_sse2
|
||||
|
||||
#undef vp8_lf_simple_mb_v
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_sse2
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_sse2
|
||||
|
||||
#undef vp8_lf_simple_b_v
|
||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_sse2
|
||||
|
||||
#undef vp8_lf_simple_mb_h
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_sse2
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_sse2
|
||||
|
||||
#undef vp8_lf_simple_b_h
|
||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_sse2
|
||||
|
@ -9,7 +9,7 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_ports/x86.h"
|
||||
#include "vp8/common/g_common.h"
|
||||
#include "vp8/common/subpixel.h"
|
||||
@ -63,9 +63,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
||||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_mmx;
|
||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_mmx;
|
||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_mmx;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_mmx;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_mmx;
|
||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_mmx;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_mmx;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_mmx;
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_mmx;
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
@ -101,9 +101,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
||||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_sse2;
|
||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_sse2;
|
||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_sse2;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_sse2;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_sse2;
|
||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_sse2;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_sse2;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_sse2;
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_sse2;
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
|
@ -180,11 +180,11 @@ static MB_PREDICTION_MODE read_mv_ref(vp8_reader *bc, const vp8_prob *p)
|
||||
return (MB_PREDICTION_MODE)i;
|
||||
}
|
||||
|
||||
static MB_PREDICTION_MODE sub_mv_ref(vp8_reader *bc, const vp8_prob *p)
|
||||
static B_PREDICTION_MODE sub_mv_ref(vp8_reader *bc, const vp8_prob *p)
|
||||
{
|
||||
const int i = vp8_treed_read(bc, vp8_sub_mv_ref_tree, p);
|
||||
|
||||
return (MB_PREDICTION_MODE)i;
|
||||
return (B_PREDICTION_MODE)i;
|
||||
}
|
||||
|
||||
#ifdef VPX_MODE_COUNT
|
||||
@ -376,7 +376,7 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
|
||||
abovemv.as_int = above_block_mv(mi, k, mis);
|
||||
mv_contz = vp8_mv_cont(&leftmv, &abovemv);
|
||||
|
||||
switch ((B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) /*pc->fc.sub_mv_ref_prob))*/
|
||||
switch (sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) /*pc->fc.sub_mv_ref_prob))*/
|
||||
{
|
||||
case NEW4X4:
|
||||
read_mv(bc, &blockmv.as_mv, (const MV_CONTEXT *) mvc);
|
||||
|
@ -94,7 +94,7 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
|
||||
{
|
||||
VP8_COMMON *cm = &pbi->common;
|
||||
|
||||
vp8_init_loop_filter(cm);
|
||||
vp8_loop_filter_init(cm);
|
||||
cm->last_frame_type = KEY_FRAME;
|
||||
cm->last_filter_type = cm->filter_type;
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
|
@ -274,9 +274,7 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
|
||||
int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
|
||||
|
||||
int filter_level;
|
||||
loop_filter_info *lfi = pc->lf_info;
|
||||
int alt_flt_enabled = xd->segmentation_enabled;
|
||||
int Segment;
|
||||
loop_filter_info_n *lfi_n = &pc->lf_info;
|
||||
|
||||
pbi->mb_row_di[ithread].mb_row = mb_row;
|
||||
pbi->mb_row_di[ithread].mbd.current_bc = &pbi->mbc[mb_row%num_part];
|
||||
@ -362,7 +360,16 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
|
||||
|
||||
if (pbi->common.filter_level)
|
||||
{
|
||||
int skip_lf;
|
||||
int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
|
||||
xd->mode_info_context->mbmi.mode != SPLITMV &&
|
||||
xd->mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
|
||||
const int seg = xd->mode_info_context->mbmi.segment_id;
|
||||
const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
|
||||
|
||||
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||
|
||||
if( mb_row != pc->mb_rows-1 )
|
||||
{
|
||||
/* Save decoded MB last row data for next-row decoding */
|
||||
@ -388,35 +395,57 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
|
||||
}
|
||||
}
|
||||
|
||||
/* update loopfilter info */
|
||||
Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
|
||||
skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
|
||||
xd->mode_info_context->mbmi.mode != SPLITMV &&
|
||||
xd->mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
filter_level = pbi->mt_baseline_filter_level[Segment];
|
||||
/* Distance of Mb to the various image edges.
|
||||
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
* Apply any context driven MB level adjustment
|
||||
*/
|
||||
filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
|
||||
|
||||
/* loopfilter on this macroblock. */
|
||||
if (filter_level)
|
||||
{
|
||||
if (mb_col > 0)
|
||||
pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
||||
if(pc->filter_type == NORMAL_LOOPFILTER)
|
||||
{
|
||||
loop_filter_info lfi;
|
||||
FRAME_TYPE frame_type = pc->frame_type;
|
||||
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||
lfi.mblim = lfi_n->mblim[filter_level];
|
||||
lfi.blim = lfi_n->blim[filter_level];
|
||||
lfi.lim = lfi_n->lim[filter_level];
|
||||
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||
|
||||
if (!skip_lf)
|
||||
pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_v)
|
||||
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, normal_b_v)
|
||||
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_h)
|
||||
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, normal_b_h)
|
||||
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_v)
|
||||
(xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, simple_b_v)
|
||||
(xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_h)
|
||||
(xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, simple_b_h)
|
||||
(xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
recon_yoffset += 16;
|
||||
@ -681,53 +710,6 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
|
||||
{
|
||||
VP8_COMMON *cm = &pbi->common;
|
||||
MACROBLOCKD *mbd = &pbi->mb;
|
||||
/*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/ /*frame_to_show;*/
|
||||
loop_filter_info *lfi = cm->lf_info;
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
/*int mb_row;
|
||||
int mb_col;
|
||||
int baseline_filter_level[MAX_MB_SEGMENTS];*/
|
||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
||||
|
||||
int i;
|
||||
/*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
|
||||
|
||||
/* Note the baseline filter values for each segment */
|
||||
if (alt_flt_enabled)
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
{
|
||||
/* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
/* Delta Value */
|
||||
else
|
||||
{
|
||||
pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
pbi->mt_baseline_filter_level[i] = default_filt_lvl;
|
||||
}
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
||||
vp8_init_loop_filter(cm);
|
||||
else if (frame_type != cm->last_frame_type)
|
||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
||||
}
|
||||
|
||||
|
||||
void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
{
|
||||
int mb_row;
|
||||
@ -738,12 +720,10 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
volatile int *last_row_current_mb_col = NULL;
|
||||
int nsync = pbi->sync_range;
|
||||
|
||||
int filter_level;
|
||||
loop_filter_info *lfi = pc->lf_info;
|
||||
int alt_flt_enabled = xd->segmentation_enabled;
|
||||
int Segment;
|
||||
int filter_level = pc->filter_level;
|
||||
loop_filter_info_n *lfi_n = &pc->lf_info;
|
||||
|
||||
if(pbi->common.filter_level)
|
||||
if (filter_level)
|
||||
{
|
||||
/* Set above_row buffer to 127 for decoding first MB row */
|
||||
vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
|
||||
@ -764,7 +744,9 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
|
||||
vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
|
||||
}
|
||||
lpf_init(pbi, pc->filter_level);
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level, pc->sharpness_level);
|
||||
}
|
||||
|
||||
setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
|
||||
@ -774,7 +756,6 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
|
||||
for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
|
||||
{
|
||||
|
||||
xd->current_bc = &pbi->mbc[mb_row%num_part];
|
||||
|
||||
/* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
|
||||
@ -875,7 +856,16 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
|
||||
if (pbi->common.filter_level)
|
||||
{
|
||||
int skip_lf;
|
||||
int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
|
||||
xd->mode_info_context->mbmi.mode != SPLITMV &&
|
||||
xd->mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
|
||||
const int seg = xd->mode_info_context->mbmi.segment_id;
|
||||
const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
|
||||
|
||||
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||
|
||||
/* Save decoded MB last row data for next-row decoding */
|
||||
if(mb_row != pc->mb_rows-1)
|
||||
{
|
||||
@ -901,36 +891,58 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
}
|
||||
}
|
||||
|
||||
/* update loopfilter info */
|
||||
Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
|
||||
skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
|
||||
xd->mode_info_context->mbmi.mode != SPLITMV &&
|
||||
xd->mode_info_context->mbmi.mb_skip_coeff);
|
||||
filter_level = pbi->mt_baseline_filter_level[Segment];
|
||||
/* Distance of Mb to the various image edges.
|
||||
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
* Apply any context driven MB level adjustment
|
||||
*/
|
||||
filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
|
||||
|
||||
/* loopfilter on this macroblock. */
|
||||
if (filter_level)
|
||||
{
|
||||
if (mb_col > 0)
|
||||
pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
||||
if(pc->filter_type == NORMAL_LOOPFILTER)
|
||||
{
|
||||
loop_filter_info lfi;
|
||||
FRAME_TYPE frame_type = pc->frame_type;
|
||||
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||
lfi.mblim = lfi_n->mblim[filter_level];
|
||||
lfi.blim = lfi_n->blim[filter_level];
|
||||
lfi.lim = lfi_n->lim[filter_level];
|
||||
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||
|
||||
if (!skip_lf)
|
||||
pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_v)
|
||||
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, normal_b_v)
|
||||
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_h)
|
||||
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, normal_b_h)
|
||||
(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_v)
|
||||
(xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, simple_b_v)
|
||||
(xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_h)
|
||||
(xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&pc->rtcd.loopfilter, simple_b_h)
|
||||
(xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
recon_yoffset += 16;
|
||||
recon_uvoffset += 8;
|
||||
|
||||
|
@ -2170,7 +2170,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
//when needed. This will avoid unnecessary calls of vp8cx_init_quantizer() for every frame.
|
||||
vp8cx_init_quantizer(cpi);
|
||||
{
|
||||
vp8_init_loop_filter(cm);
|
||||
vp8_loop_filter_init(cm);
|
||||
cm->last_frame_type = KEY_FRAME;
|
||||
cm->last_filter_type = cm->filter_type;
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
|
@ -262,10 +262,19 @@ static void vp8_temporal_filter_iterate_c
|
||||
for (mb_row = 0; mb_row < mb_rows; mb_row++)
|
||||
{
|
||||
#if ALT_REF_MC_ENABLED
|
||||
// Reduced search extent by 3 for 6-tap filter & smaller UMV border
|
||||
cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19));
|
||||
// Source frames are extended to 16 pixels. This is different than
|
||||
// L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS)
|
||||
// A 6 tap filter is used for motion search. This requires 2 pixels
|
||||
// before and 3 pixels after. So the largest Y mv on a border would
|
||||
// then be 16 - 3. The UV blocks are half the size of the Y and
|
||||
// therefore only extended by 8. The largest mv that a UV block
|
||||
// can support is 8 - 3. A UV mv is half of a Y mv.
|
||||
// (16 - 3) >> 1 == 6 which is greater than 8 - 3.
|
||||
// To keep the mv in play for both Y and UV planes the max that it
|
||||
// can be on a border is therefore 16 - 5.
|
||||
cpi->mb.mv_row_min = -((mb_row * 16) + (16 - 5));
|
||||
cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
|
||||
+ (VP8BORDERINPIXELS - 19);
|
||||
+ (16 - 5);
|
||||
#endif
|
||||
|
||||
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
||||
@ -277,10 +286,9 @@ static void vp8_temporal_filter_iterate_c
|
||||
vpx_memset(count, 0, 384*sizeof(unsigned short));
|
||||
|
||||
#if ALT_REF_MC_ENABLED
|
||||
// Reduced search extent by 3 for 6-tap filter & smaller UMV border
|
||||
cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19));
|
||||
cpi->mb.mv_col_min = -((mb_col * 16) + (16 - 5));
|
||||
cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
|
||||
+ (VP8BORDERINPIXELS - 19);
|
||||
+ (16 - 5);
|
||||
#endif
|
||||
|
||||
for (frame = 0; frame < frame_count; frame++)
|
||||
|
@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __VPX_MEM_NDS_H__
|
||||
#define __VPX_MEM_NDS_H__
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <nitro.h>
|
||||
#include <nitro/os.h>
|
||||
|
||||
void *vpx_mem_nds_alloc(osarena_id id, osheap_handle handle, size_t size, size_t align);
|
||||
void vpx_mem_nds_free(osarena_id id, osheap_handle handle, void *mem);
|
||||
int vpx_nds_alloc_heap(osarena_id id, u32 size);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /*__VPX_MEM_NDS_H__*/
|
@ -36,9 +36,6 @@
|
||||
# include <winbase.h>
|
||||
#elif defined(VXWORKS)
|
||||
# include <sem_lib.h>
|
||||
#elif defined(NDS_NITRO)
|
||||
# include <nitro.h>
|
||||
# include <nitro/os.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
@ -112,8 +109,6 @@ struct memory_tracker
|
||||
HANDLE mutex;
|
||||
#elif defined(VXWORKS)
|
||||
SEM_ID mutex;
|
||||
#elif defined(NDS_NITRO)
|
||||
OSMutex mutex;
|
||||
#elif defined(NO_MUTEX)
|
||||
#else
|
||||
#error "No mutex type defined for this platform!"
|
||||
@ -193,9 +188,6 @@ int vpx_memory_tracker_init(int padding_size, int pad_value)
|
||||
memtrack.mutex = sem_bcreate(SEM_Q_FIFO, /*SEM_Q_FIFO non-priority based mutex*/
|
||||
SEM_FULL); /*SEM_FULL initial state is unlocked*/
|
||||
ret = !memtrack.mutex;
|
||||
#elif defined(NDS_NITRO)
|
||||
os_init_mutex(&memtrack.mutex);
|
||||
ret = 0;
|
||||
#elif defined(NO_MUTEX)
|
||||
ret = 0;
|
||||
#endif
|
||||
@ -251,9 +243,7 @@ void vpx_memory_tracker_destroy()
|
||||
|
||||
if (!g_logging.type && g_logging.file && g_logging.file != stderr)
|
||||
{
|
||||
#if !defined(NDS_NITRO)
|
||||
fclose(g_logging.file);
|
||||
#endif
|
||||
g_logging.file = NULL;
|
||||
}
|
||||
|
||||
@ -368,15 +358,12 @@ int vpx_memory_tracker_set_log_type(int type, char *option)
|
||||
g_logging.file = stderr;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
#if !defined(NDS_NITRO)
|
||||
else
|
||||
{
|
||||
if ((g_logging.file = fopen((char *)option, "w")))
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
break;
|
||||
#if defined(WIN32) && !defined(_WIN32_WCE)
|
||||
case 1:
|
||||
@ -506,12 +493,6 @@ static void memory_tracker_dump()
|
||||
p->addr, i, p->size,
|
||||
p->file, p->line);
|
||||
|
||||
#ifdef NDS_NITRO
|
||||
|
||||
if (!(i % 20)) os_sleep(500);
|
||||
|
||||
#endif
|
||||
|
||||
p = p->next;
|
||||
++i;
|
||||
}
|
||||
@ -719,9 +700,6 @@ static int memory_tracker_lock_mutex()
|
||||
ret = WaitForSingleObject(memtrack.mutex, INFINITE);
|
||||
#elif defined(VXWORKS)
|
||||
ret = sem_take(memtrack.mutex, WAIT_FOREVER);
|
||||
#elif defined(NDS_NITRO)
|
||||
os_lock_mutex(&memtrack.mutex);
|
||||
ret = 0;
|
||||
#endif
|
||||
|
||||
if (ret)
|
||||
@ -754,9 +732,6 @@ static int memory_tracker_unlock_mutex()
|
||||
ret = !ReleaseMutex(memtrack.mutex);
|
||||
#elif defined(VXWORKS)
|
||||
ret = sem_give(memtrack.mutex);
|
||||
#elif defined(NDS_NITRO)
|
||||
os_unlock_mutex(&memtrack.mutex);
|
||||
ret = 0;
|
||||
#endif
|
||||
|
||||
if (ret)
|
||||
|
@ -1,221 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Module Title : yv12extend.c
|
||||
*
|
||||
* Description :
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
* Header Files
|
||||
****************************************************************************/
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include <nitro.h>
|
||||
#include <nitro/mi.h>
|
||||
#include <nitro/itcm_begin.h>
|
||||
|
||||
//---- DMA Number
|
||||
#define DMA_NO 3
|
||||
|
||||
/****************************************************************************
|
||||
* Exports
|
||||
****************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
****************************************************************************/
|
||||
void
|
||||
vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
|
||||
{
|
||||
int i;
|
||||
unsigned char *src_ptr1, *src_ptr2;
|
||||
unsigned char *dest_ptr1, *dest_ptr2;
|
||||
|
||||
unsigned int Border;
|
||||
int plane_stride;
|
||||
int plane_height;
|
||||
int plane_width;
|
||||
|
||||
/***********/
|
||||
/* Y Plane */
|
||||
/***********/
|
||||
Border = ybf->border;
|
||||
plane_stride = ybf->y_stride;
|
||||
plane_height = ybf->y_height;
|
||||
plane_width = ybf->y_width;
|
||||
|
||||
// copy the left and right most columns out
|
||||
src_ptr1 = ybf->y_buffer;
|
||||
src_ptr2 = src_ptr1 + plane_width - 1;
|
||||
dest_ptr1 = src_ptr1 - Border;
|
||||
dest_ptr2 = src_ptr2 + 1;
|
||||
|
||||
for (i = 0; i < plane_height; i++)
|
||||
{
|
||||
mi_cpu_fill8(dest_ptr1, src_ptr1[0], Border);
|
||||
mi_cpu_fill8(dest_ptr2, src_ptr2[0], Border);
|
||||
src_ptr1 += plane_stride;
|
||||
src_ptr2 += plane_stride;
|
||||
dest_ptr1 += plane_stride;
|
||||
dest_ptr2 += plane_stride;
|
||||
}
|
||||
|
||||
// Now copy the top and bottom source lines into each line of the respective borders
|
||||
src_ptr1 = ybf->y_buffer - Border;
|
||||
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
|
||||
dest_ptr1 = src_ptr1 - (Border * plane_stride);
|
||||
dest_ptr2 = src_ptr2 + plane_stride;
|
||||
|
||||
for (i = 0; i < (int)Border; i++)
|
||||
{
|
||||
mi_cpu_copy_fast(src_ptr1, dest_ptr1, plane_stride);
|
||||
mi_cpu_copy_fast(src_ptr2, dest_ptr2, plane_stride);
|
||||
dest_ptr1 += plane_stride;
|
||||
dest_ptr2 += plane_stride;
|
||||
}
|
||||
|
||||
plane_stride /= 2;
|
||||
plane_height /= 2;
|
||||
plane_width /= 2;
|
||||
Border /= 2;
|
||||
|
||||
/***********/
|
||||
/* U Plane */
|
||||
/***********/
|
||||
|
||||
// copy the left and right most columns out
|
||||
src_ptr1 = ybf->u_buffer;
|
||||
src_ptr2 = src_ptr1 + plane_width - 1;
|
||||
dest_ptr1 = src_ptr1 - Border;
|
||||
dest_ptr2 = src_ptr2 + 1;
|
||||
|
||||
for (i = 0; i < plane_height; i++)
|
||||
{
|
||||
mi_cpu_fill8(dest_ptr1, src_ptr1[0], Border);
|
||||
mi_cpu_fill8(dest_ptr2, src_ptr2[0], Border);
|
||||
src_ptr1 += plane_stride;
|
||||
src_ptr2 += plane_stride;
|
||||
dest_ptr1 += plane_stride;
|
||||
dest_ptr2 += plane_stride;
|
||||
}
|
||||
|
||||
// Now copy the top and bottom source lines into each line of the respective borders
|
||||
src_ptr1 = ybf->u_buffer - Border;
|
||||
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
|
||||
dest_ptr1 = src_ptr1 - (Border * plane_stride);
|
||||
dest_ptr2 = src_ptr2 + plane_stride;
|
||||
|
||||
for (i = 0; i < (int)(Border); i++)
|
||||
{
|
||||
mi_cpu_copy_fast(src_ptr1, dest_ptr1, plane_stride);
|
||||
mi_cpu_copy_fast(src_ptr2, dest_ptr2, plane_stride);
|
||||
dest_ptr1 += plane_stride;
|
||||
dest_ptr2 += plane_stride;
|
||||
}
|
||||
|
||||
/***********/
|
||||
/* V Plane */
|
||||
/***********/
|
||||
|
||||
// copy the left and right most columns out
|
||||
src_ptr1 = ybf->v_buffer;
|
||||
src_ptr2 = src_ptr1 + plane_width - 1;
|
||||
dest_ptr1 = src_ptr1 - Border;
|
||||
dest_ptr2 = src_ptr2 + 1;
|
||||
|
||||
for (i = 0; i < plane_height; i++)
|
||||
{
|
||||
mi_cpu_fill8(dest_ptr1, src_ptr1[0], Border);
|
||||
mi_cpu_fill8(dest_ptr2, src_ptr2[0], Border);
|
||||
src_ptr1 += plane_stride;
|
||||
src_ptr2 += plane_stride;
|
||||
dest_ptr1 += plane_stride;
|
||||
dest_ptr2 += plane_stride;
|
||||
}
|
||||
|
||||
// Now copy the top and bottom source lines into each line of the respective borders
|
||||
src_ptr1 = ybf->v_buffer - Border;
|
||||
src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
|
||||
dest_ptr1 = src_ptr1 - (Border * plane_stride);
|
||||
dest_ptr2 = src_ptr2 + plane_stride;
|
||||
|
||||
for (i = 0; i < (int)(Border); i++)
|
||||
{
|
||||
mi_cpu_copy_fast(src_ptr1, dest_ptr1, plane_stride);
|
||||
mi_cpu_copy_fast(src_ptr2, dest_ptr2, plane_stride);
|
||||
dest_ptr1 += plane_stride;
|
||||
dest_ptr2 += plane_stride;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : vp8_yv12_copy_frame
|
||||
*
|
||||
* INPUTS :
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Copies the source image into the destination image and
|
||||
* updates the destination's UMV borders.
|
||||
*
|
||||
* SPECIAL NOTES : The frames are assumed to be identical in size.
|
||||
*
|
||||
****************************************************************************/
|
||||
void
|
||||
vp8_yv12_copy_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc)
|
||||
{
|
||||
int yplane_size = (src_ybc->y_height + 2 * src_ybc->border) * (src_ybc->y_stride);
|
||||
int mem_size = (yplane_size * 3 / 2) + (src_ybc->y_stride * 2);
|
||||
|
||||
mi_cpu_copy_fast(src_ybc->buffer_alloc, dst_ybc->buffer_alloc, mem_size);
|
||||
|
||||
/* unsigned char *src_y, *dst_y;
|
||||
unsigned char *src_u, *dst_u;
|
||||
unsigned char *src_v, *dst_v;
|
||||
|
||||
int yheight, uv_height;
|
||||
int ystride, uv_stride;
|
||||
int border;
|
||||
int yoffset, uvoffset;
|
||||
|
||||
border = src_ybc->border;
|
||||
yheight = src_ybc->y_height;
|
||||
uv_height = src_ybc->uv_height;
|
||||
|
||||
ystride = src_ybc->y_stride;
|
||||
uv_stride = src_ybc->uv_stride;
|
||||
|
||||
yoffset = border * (ystride + 1);
|
||||
uvoffset = border/2 * (uv_stride + 1);
|
||||
|
||||
src_y = src_ybc->y_buffer - yoffset;
|
||||
dst_y = dst_ybc->y_buffer - yoffset;
|
||||
src_u = src_ybc->u_buffer - uvoffset;
|
||||
dst_u = dst_ybc->u_buffer - uvoffset;
|
||||
src_v = src_ybc->v_buffer - uvoffset;
|
||||
dst_v = dst_ybc->v_buffer - uvoffset;
|
||||
|
||||
mi_cpu_copy_fast (src_y, dst_y, ystride * (yheight + 2 * border));
|
||||
mi_cpu_copy_fast (src_u, dst_u, uv_stride * (uv_height + border));
|
||||
mi_cpu_copy_fast (src_v, dst_v, uv_stride * (uv_height + border));
|
||||
*/
|
||||
}
|
||||
|
||||
#include <nitro/itcm_end.h>
|
@ -24,9 +24,12 @@ vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf)
|
||||
{
|
||||
if (ybf)
|
||||
{
|
||||
duck_free(ybf->buffer_alloc);
|
||||
vpx_free(ybf->buffer_alloc);
|
||||
|
||||
ybf->buffer_alloc = 0;
|
||||
/* buffer_alloc isn't accessed by most functions. Rather y_buffer,
|
||||
u_buffer and v_buffer point to buffer_alloc and are used. Clear out
|
||||
all of this so that a freed pointer isn't inadvertently used */
|
||||
vpx_memset (ybf, 0, sizeof (YV12_BUFFER_CONFIG));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -44,38 +47,37 @@ vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int
|
||||
{
|
||||
/*NOTE:*/
|
||||
|
||||
int yplane_size = (height + 2 * border) * (width + 2 * border);
|
||||
int uvplane_size = ((1 + height) / 2 + border) * ((1 + width) / 2 + border);
|
||||
|
||||
if (ybf)
|
||||
{
|
||||
int uv_width = width >> 1;
|
||||
int uv_height = height >> 1;
|
||||
int yplane_size = (height + 2 * border) * (width + 2 * border);
|
||||
int uvplane_size = (uv_height + border) * (uv_width + border);
|
||||
|
||||
vp8_yv12_de_alloc_frame_buffer(ybf);
|
||||
|
||||
/* only support allocating buffers that have
|
||||
a height and width that are multiples of 16 */
|
||||
if ((width & 0xf) | (height & 0xf))
|
||||
return -3;
|
||||
|
||||
ybf->y_width = width;
|
||||
ybf->y_height = height;
|
||||
ybf->y_stride = width + 2 * border;
|
||||
|
||||
ybf->uv_width = (1 + width) / 2;
|
||||
ybf->uv_height = (1 + height) / 2;
|
||||
ybf->uv_stride = ybf->uv_width + border;
|
||||
ybf->uv_width = uv_width;
|
||||
ybf->uv_height = uv_height;
|
||||
ybf->uv_stride = uv_width + border;
|
||||
|
||||
ybf->border = border;
|
||||
ybf->frame_size = yplane_size + 2 * uvplane_size;
|
||||
|
||||
/* Added 2 extra lines to framebuffer so that copy12x12 doesn't fail
|
||||
* when we have a large motion vector in V on the last v block.
|
||||
* Note : We never use these pixels anyway so this doesn't hurt.
|
||||
*/
|
||||
ybf->buffer_alloc = (unsigned char *) duck_memalign(32, ybf->frame_size + (ybf->y_stride * 2) + 32, 0);
|
||||
ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size);
|
||||
|
||||
if (ybf->buffer_alloc == NULL)
|
||||
return -1;
|
||||
|
||||
ybf->y_buffer = ybf->buffer_alloc + (border * ybf->y_stride) + border;
|
||||
|
||||
if (yplane_size & 0xf)
|
||||
yplane_size += 16 - (yplane_size & 0xf);
|
||||
|
||||
ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * ybf->uv_stride) + border / 2;
|
||||
ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * ybf->uv_stride) + border / 2;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user