dirac: Fix mmx/sse haar wavelet compose
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
0dc22e92f4
commit
754539a409
@ -30,6 +30,8 @@ void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b
|
|||||||
void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
|
void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
|
||||||
void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
|
void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
|
||||||
void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \
|
void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \
|
||||||
|
void ff_horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
|
||||||
|
void ff_horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
|
||||||
\
|
\
|
||||||
static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
|
static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
|
||||||
{ \
|
{ \
|
||||||
@ -83,6 +85,28 @@ static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
|
|||||||
\
|
\
|
||||||
ff_vertical_compose_haar##ext(b0, b1, width_align); \
|
ff_vertical_compose_haar##ext(b0, b1, width_align); \
|
||||||
} \
|
} \
|
||||||
|
static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
|
||||||
|
{\
|
||||||
|
int w2= w>>1;\
|
||||||
|
int x= w2 - (w2&(align-1));\
|
||||||
|
ff_horizontal_compose_haar0i##ext(b, tmp, w);\
|
||||||
|
\
|
||||||
|
for (; x < w2; x++) {\
|
||||||
|
b[2*x ] = tmp[x];\
|
||||||
|
b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
|
||||||
|
}\
|
||||||
|
}\
|
||||||
|
static void horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
|
||||||
|
{\
|
||||||
|
int w2= w>>1;\
|
||||||
|
int x= w2 - (w2&(align-1));\
|
||||||
|
ff_horizontal_compose_haar1i##ext(b, tmp, w);\
|
||||||
|
\
|
||||||
|
for (; x < w2; x++) {\
|
||||||
|
b[2*x ] = (tmp[x] + 1)>>1;\
|
||||||
|
b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\
|
||||||
|
}\
|
||||||
|
}\
|
||||||
\
|
\
|
||||||
|
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
@ -95,11 +119,6 @@ COMPOSE_VERTICAL(_sse2, 8)
|
|||||||
|
|
||||||
void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w);
|
void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w);
|
||||||
|
|
||||||
void ff_horizontal_compose_haar0i_mmx(IDWTELEM *b, IDWTELEM *tmp, int w);
|
|
||||||
void ff_horizontal_compose_haar1i_mmx(IDWTELEM *b, IDWTELEM *tmp, int w);
|
|
||||||
void ff_horizontal_compose_haar0i_sse2(IDWTELEM *b, IDWTELEM *tmp, int w);
|
|
||||||
void ff_horizontal_compose_haar1i_sse2(IDWTELEM *b, IDWTELEM *tmp, int w);
|
|
||||||
|
|
||||||
void ff_horizontal_compose_dd97i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x)
|
void ff_horizontal_compose_dd97i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x)
|
||||||
{
|
{
|
||||||
for (; x < w2; x++) {
|
for (; x < w2; x++) {
|
||||||
@ -108,22 +127,6 @@ void ff_horizontal_compose_dd97i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ff_horizontal_compose_haar0i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x)
|
|
||||||
{
|
|
||||||
for (; x < w2; x++) {
|
|
||||||
b[2*x ] = tmp[x];
|
|
||||||
b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ff_horizontal_compose_haar1i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x)
|
|
||||||
{
|
|
||||||
for (; x < w2; x++) {
|
|
||||||
b[2*x ] = (tmp[x] + 1)>>1;
|
|
||||||
b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
|
void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
|
||||||
{
|
{
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
@ -148,11 +151,11 @@ void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
|
|||||||
break;
|
break;
|
||||||
case DWT_DIRAC_HAAR0:
|
case DWT_DIRAC_HAAR0:
|
||||||
d->vertical_compose = vertical_compose_haar_mmx;
|
d->vertical_compose = vertical_compose_haar_mmx;
|
||||||
d->horizontal_compose = ff_horizontal_compose_haar0i_mmx;
|
d->horizontal_compose = horizontal_compose_haar0i_mmx;
|
||||||
break;
|
break;
|
||||||
case DWT_DIRAC_HAAR1:
|
case DWT_DIRAC_HAAR1:
|
||||||
d->vertical_compose = vertical_compose_haar_mmx;
|
d->vertical_compose = vertical_compose_haar_mmx;
|
||||||
d->horizontal_compose = ff_horizontal_compose_haar1i_mmx;
|
d->horizontal_compose = horizontal_compose_haar1i_mmx;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -175,11 +178,11 @@ void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
|
|||||||
break;
|
break;
|
||||||
case DWT_DIRAC_HAAR0:
|
case DWT_DIRAC_HAAR0:
|
||||||
d->vertical_compose = vertical_compose_haar_sse2;
|
d->vertical_compose = vertical_compose_haar_sse2;
|
||||||
//MMXDISABLED d->horizontal_compose = ff_horizontal_compose_haar0i_sse2;
|
d->horizontal_compose = horizontal_compose_haar0i_sse2;
|
||||||
break;
|
break;
|
||||||
case DWT_DIRAC_HAAR1:
|
case DWT_DIRAC_HAAR1:
|
||||||
d->vertical_compose = vertical_compose_haar_sse2;
|
d->vertical_compose = vertical_compose_haar_sse2;
|
||||||
d->horizontal_compose = ff_horizontal_compose_haar1i_sse2;
|
d->horizontal_compose = horizontal_compose_haar1i_sse2;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,8 +22,6 @@
|
|||||||
%include "x86inc.asm"
|
%include "x86inc.asm"
|
||||||
|
|
||||||
cextern horizontal_compose_dd97i_end_c
|
cextern horizontal_compose_dd97i_end_c
|
||||||
cextern horizontal_compose_haar0i_end_c
|
|
||||||
cextern horizontal_compose_haar1i_end_c
|
|
||||||
|
|
||||||
SECTION_RODATA
|
SECTION_RODATA
|
||||||
pw_1: times 8 dw 1
|
pw_1: times 8 dw 1
|
||||||
@ -188,7 +186,7 @@ cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width
|
|||||||
; void horizontal_compose_haari(IDWTELEM *b, IDWTELEM *tmp, int width)
|
; void horizontal_compose_haari(IDWTELEM *b, IDWTELEM *tmp, int width)
|
||||||
cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
|
cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
|
||||||
mov w2d, wd
|
mov w2d, wd
|
||||||
xor xd, xd
|
xor xq, xq
|
||||||
shr w2d, 1
|
shr w2d, 1
|
||||||
lea b_w2q, [bq+wq]
|
lea b_w2q, [bq+wq]
|
||||||
mova m3, [pw_1]
|
mova m3, [pw_1]
|
||||||
@ -199,13 +197,13 @@ cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
|
|||||||
psraw m1, 1
|
psraw m1, 1
|
||||||
psubw m0, m1
|
psubw m0, m1
|
||||||
mova [tmpq + 2*xq], m0
|
mova [tmpq + 2*xq], m0
|
||||||
add xd, mmsize/2
|
add xq, mmsize/2
|
||||||
cmp xd, w2d
|
cmp xq, w2q
|
||||||
jl .lowpass_loop
|
jl .lowpass_loop
|
||||||
|
|
||||||
xor xd, xd
|
xor xq, xq
|
||||||
and w2d, ~(mmsize/2 - 1)
|
and w2q, ~(mmsize/2 - 1)
|
||||||
cmp w2d, mmsize/2
|
cmp w2q, mmsize/2
|
||||||
jl .end
|
jl .end
|
||||||
|
|
||||||
.highpass_loop:
|
.highpass_loop:
|
||||||
@ -226,11 +224,11 @@ cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
|
|||||||
mova [bq+4*xq], m0
|
mova [bq+4*xq], m0
|
||||||
mova [bq+4*xq+mmsize], m2
|
mova [bq+4*xq+mmsize], m2
|
||||||
|
|
||||||
add xd, mmsize/2
|
add xq, mmsize/2
|
||||||
cmp xd, w2d
|
cmp xq, w2q
|
||||||
jl .highpass_loop
|
jl .highpass_loop
|
||||||
.end:
|
.end:
|
||||||
END_HORIZONTAL horizontal_compose_haar%2i_end_c
|
REP_RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user