Merge "Removing vp9_maskingmv.c and corresponding assembly file."
This commit is contained in:
commit
20986c81b3
@ -1,803 +0,0 @@
|
||||
/*
|
||||
============================================================================
|
||||
Name : vp9_maskingmv.c
|
||||
Author : jimbankoski
|
||||
Version :
|
||||
Copyright : Your copyright notice
|
||||
Description : Hello World in C, Ansi-style
|
||||
============================================================================
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
unsigned int vp9_sad16x16_sse3(
|
||||
unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
int max_err);
|
||||
|
||||
int vp8_growmaskmb_sse3(
|
||||
unsigned char *om,
|
||||
unsigned char *nm);
|
||||
|
||||
void vp8_makemask_sse3(
|
||||
unsigned char *y,
|
||||
unsigned char *u,
|
||||
unsigned char *v,
|
||||
unsigned char *ym,
|
||||
int yp,
|
||||
int uvp,
|
||||
int ys,
|
||||
int us,
|
||||
int vs,
|
||||
int yt,
|
||||
int ut,
|
||||
int vt);
|
||||
|
||||
unsigned int vp9_sad16x16_unmasked_wmt(
|
||||
unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned char *mask);
|
||||
|
||||
unsigned int vp9_sad16x16_masked_wmt(
|
||||
unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned char *mask);
|
||||
|
||||
unsigned int vp8_masked_predictor_wmt(
|
||||
unsigned char *masked,
|
||||
unsigned char *unmasked,
|
||||
int src_stride,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_stride,
|
||||
unsigned char *mask);
|
||||
unsigned int vp8_masked_predictor_uv_wmt(
|
||||
unsigned char *masked,
|
||||
unsigned char *unmasked,
|
||||
int src_stride,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_stride,
|
||||
unsigned char *mask);
|
||||
unsigned int vp8_uv_from_y_mask(
|
||||
unsigned char *ymask,
|
||||
unsigned char *uvmask);
|
||||
int yp = 16;
|
||||
unsigned char sxy[] = {
|
||||
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
|
||||
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90
|
||||
};
|
||||
|
||||
unsigned char sts[] = {
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
};
|
||||
unsigned char str[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
};
|
||||
|
||||
unsigned char y[] = {
|
||||
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
|
||||
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
|
||||
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
|
||||
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
|
||||
40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40,
|
||||
60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40,
|
||||
60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40,
|
||||
60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40,
|
||||
40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40,
|
||||
40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40,
|
||||
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
|
||||
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
|
||||
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
|
||||
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
|
||||
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
|
||||
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40
|
||||
};
|
||||
int uvp = 8;
|
||||
unsigned char u[] = {
|
||||
90, 80, 70, 70, 90, 90, 90, 17,
|
||||
90, 80, 70, 70, 90, 90, 90, 17,
|
||||
84, 70, 70, 90, 90, 90, 17, 17,
|
||||
84, 70, 70, 90, 90, 90, 17, 17,
|
||||
80, 70, 70, 90, 90, 90, 17, 17,
|
||||
90, 80, 70, 70, 90, 90, 90, 17,
|
||||
90, 80, 70, 70, 90, 90, 90, 17,
|
||||
90, 80, 70, 70, 90, 90, 90, 17
|
||||
};
|
||||
|
||||
unsigned char v[] = {
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80
|
||||
};
|
||||
|
||||
unsigned char ym[256];
|
||||
unsigned char uvm[64];
|
||||
typedef struct {
|
||||
unsigned char y;
|
||||
unsigned char yt;
|
||||
unsigned char u;
|
||||
unsigned char ut;
|
||||
unsigned char v;
|
||||
unsigned char vt;
|
||||
unsigned char use;
|
||||
} COLOR_SEG_ELEMENT;
|
||||
|
||||
/*
|
||||
COLOR_SEG_ELEMENT segmentation[]=
|
||||
{
|
||||
{ 60,4,80,17,80,10, 1},
|
||||
{ 40,4,15,10,80,10, 1},
|
||||
};
|
||||
*/
|
||||
|
||||
COLOR_SEG_ELEMENT segmentation[] = {
|
||||
{ 79, 44, 92, 44, 237, 60, 1},
|
||||
};
|
||||
|
||||
unsigned char pixel_mask(unsigned char y, unsigned char u, unsigned char v,
|
||||
COLOR_SEG_ELEMENT sgm[],
|
||||
int c) {
|
||||
COLOR_SEG_ELEMENT *s = sgm;
|
||||
unsigned char m = 0;
|
||||
int i;
|
||||
for (i = 0; i < c; i++, s++)
|
||||
m |= (abs(y - s->y) < s->yt &&
|
||||
abs(u - s->u) < s->ut &&
|
||||
abs(v - s->v) < s->vt ? 255 : 0);
|
||||
|
||||
return m;
|
||||
}
|
||||
int neighbors[256][8];
|
||||
int makeneighbors(void) {
|
||||
int i, j;
|
||||
for (i = 0; i < 256; i++) {
|
||||
int r = (i >> 4), c = (i & 15);
|
||||
int ni = 0;
|
||||
for (j = 0; j < 8; j++)
|
||||
neighbors[i][j] = i;
|
||||
for (j = 0; j < 256; j++) {
|
||||
int nr = (j >> 4), nc = (j & 15);
|
||||
if (abs(nr - r) < 2 && abs(nc - c) < 2)
|
||||
neighbors[i][ni++] = j;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
void grow_ymask(unsigned char *ym) {
|
||||
unsigned char nym[256];
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 256; i++) {
|
||||
nym[i] = ym[i];
|
||||
for (j = 0; j < 8; j++) {
|
||||
nym[i] |= ym[neighbors[i][j]];
|
||||
}
|
||||
}
|
||||
for (i = 0; i < 256; i++)
|
||||
ym[i] = nym[i];
|
||||
}
|
||||
|
||||
void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
unsigned char *ym, unsigned char *uvm,
|
||||
int yp, int uvp,
|
||||
COLOR_SEG_ELEMENT sgm[],
|
||||
int count) {
|
||||
int r, c;
|
||||
unsigned char *oym = ym;
|
||||
|
||||
memset(ym, 20, 256);
|
||||
for (r = 0; r < 8; r++, uvm += 8, u += uvp, v += uvp, y += (yp + yp), ym += 32)
|
||||
for (c = 0; c < 8; c++) {
|
||||
int y1 = y[c << 1];
|
||||
int u1 = u[c];
|
||||
int v1 = v[c];
|
||||
int m = pixel_mask(y1, u1, v1, sgm, count);
|
||||
uvm[c] = m;
|
||||
ym[c << 1] = uvm[c]; // = pixel_mask(y[c<<1],u[c],v[c],sgm,count);
|
||||
ym[(c << 1) + 1] = pixel_mask(y[1 + (c << 1)], u[c], v[c], sgm, count);
|
||||
ym[(c << 1) + 16] = pixel_mask(y[yp + (c << 1)], u[c], v[c], sgm, count);
|
||||
ym[(c << 1) + 17] = pixel_mask(y[1 + yp + (c << 1)], u[c], v[c], sgm, count);
|
||||
}
|
||||
grow_ymask(oym);
|
||||
}
|
||||
|
||||
int masked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
|
||||
unsigned char *ym) {
|
||||
int i, j;
|
||||
unsigned sad = 0;
|
||||
for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16)
|
||||
for (j = 0; j < 16; j++)
|
||||
if (ym[j])
|
||||
sad += abs(src[j] - dst[j]);
|
||||
|
||||
return sad;
|
||||
}
|
||||
|
||||
int compare_masks(unsigned char *sym, unsigned char *ym) {
|
||||
int i, j;
|
||||
unsigned sad = 0;
|
||||
for (i = 0; i < 16; i++, sym += 16, ym += 16)
|
||||
for (j = 0; j < 16; j++)
|
||||
sad += (sym[j] != ym[j] ? 1 : 0);
|
||||
|
||||
return sad;
|
||||
}
|
||||
|
||||
int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
|
||||
unsigned char *ym) {
|
||||
int i, j;
|
||||
unsigned sad = 0;
|
||||
for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16)
|
||||
for (j = 0; j < 16; j++)
|
||||
if (!ym[j])
|
||||
sad += abs(src[j] - dst[j]);
|
||||
|
||||
return sad;
|
||||
}
|
||||
|
||||
int masked_motion_search(unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int yp, int uvp,
|
||||
unsigned char *dy, unsigned char *du, unsigned char *dv,
|
||||
int dyp, int duvp,
|
||||
COLOR_SEG_ELEMENT sgm[],
|
||||
int count,
|
||||
int *mi,
|
||||
int *mj,
|
||||
int *ui,
|
||||
int *uj,
|
||||
int *wm) {
|
||||
int i, j;
|
||||
|
||||
unsigned char ym[256];
|
||||
unsigned char uvm[64];
|
||||
unsigned char dym[256];
|
||||
unsigned char duvm[64];
|
||||
unsigned int e = 0;
|
||||
int beste = 256;
|
||||
int bmi = -32, bmj = -32;
|
||||
int bui = -32, buj = -32;
|
||||
int beste1 = 256;
|
||||
int bmi1 = -32, bmj1 = -32;
|
||||
int bui1 = -32, buj1 = -32;
|
||||
int obeste;
|
||||
|
||||
// first try finding best mask and then unmasked
|
||||
beste = 0xffffffff;
|
||||
|
||||
// find best unmasked mv
|
||||
for (i = -32; i < 32; i++) {
|
||||
unsigned char *dyz = i * dyp + dy;
|
||||
unsigned char *duz = i / 2 * duvp + du;
|
||||
unsigned char *dvz = i / 2 * duvp + dv;
|
||||
for (j = -32; j < 32; j++) {
|
||||
// 0,0 masked destination
|
||||
make_mb_mask(dyz + j, duz + j / 2, dvz + j / 2, dym, duvm, dyp, duvp, sgm, count);
|
||||
|
||||
e = unmasked_sad(y, yp, dyz + j, dyp, dym);
|
||||
|
||||
if (e < beste) {
|
||||
bui = i;
|
||||
buj = j;
|
||||
beste = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
// bui=0;buj=0;
|
||||
// best mv masked destination
|
||||
make_mb_mask(dy + bui * dyp + buj, du + bui / 2 * duvp + buj / 2, dv + bui / 2 * duvp + buj / 2,
|
||||
dym, duvm, dyp, duvp, sgm, count);
|
||||
|
||||
obeste = beste;
|
||||
beste = 0xffffffff;
|
||||
|
||||
// find best masked
|
||||
for (i = -32; i < 32; i++) {
|
||||
unsigned char *dyz = i * dyp + dy;
|
||||
for (j = -32; j < 32; j++) {
|
||||
e = masked_sad(y, yp, dyz + j, dyp, dym);
|
||||
|
||||
if (e < beste) {
|
||||
bmi = i;
|
||||
bmj = j;
|
||||
beste = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
beste1 = beste + obeste;
|
||||
bmi1 = bmi;
|
||||
bmj1 = bmj;
|
||||
bui1 = bui;
|
||||
buj1 = buj;
|
||||
|
||||
beste = 0xffffffff;
|
||||
// source mask
|
||||
make_mb_mask(y, u, v, ym, uvm, yp, uvp, sgm, count);
|
||||
|
||||
// find best mask
|
||||
for (i = -32; i < 32; i++) {
|
||||
unsigned char *dyz = i * dyp + dy;
|
||||
unsigned char *duz = i / 2 * duvp + du;
|
||||
unsigned char *dvz = i / 2 * duvp + dv;
|
||||
for (j = -32; j < 32; j++) {
|
||||
// 0,0 masked destination
|
||||
make_mb_mask(dyz + j, duz + j / 2, dvz + j / 2, dym, duvm, dyp, duvp, sgm, count);
|
||||
|
||||
e = compare_masks(ym, dym);
|
||||
|
||||
if (e < beste) {
|
||||
bmi = i;
|
||||
bmj = j;
|
||||
beste = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// best mv masked destination
|
||||
make_mb_mask(dy + bmi * dyp + bmj, du + bmi / 2 * duvp + bmj / 2, dv + bmi / 2 * duvp + bmj / 2,
|
||||
dym, duvm, dyp, duvp, sgm, count);
|
||||
|
||||
obeste = masked_sad(y, yp, dy + bmi * dyp + bmj, dyp, dym);
|
||||
|
||||
beste = 0xffffffff;
|
||||
|
||||
// find best unmasked mv
|
||||
for (i = -32; i < 32; i++) {
|
||||
unsigned char *dyz = i * dyp + dy;
|
||||
for (j = -32; j < 32; j++) {
|
||||
e = unmasked_sad(y, yp, dyz + j, dyp, dym);
|
||||
|
||||
if (e < beste) {
|
||||
bui = i;
|
||||
buj = j;
|
||||
beste = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
beste += obeste;
|
||||
|
||||
|
||||
if (beste < beste1) {
|
||||
*mi = bmi;
|
||||
*mj = bmj;
|
||||
*ui = bui;
|
||||
*uj = buj;
|
||||
*wm = 1;
|
||||
} else {
|
||||
*mi = bmi1;
|
||||
*mj = bmj1;
|
||||
*ui = bui1;
|
||||
*uj = buj1;
|
||||
*wm = 0;
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int predict(unsigned char *src, int p, unsigned char *dst, int dp,
|
||||
unsigned char *ym, unsigned char *prd) {
|
||||
int i, j;
|
||||
for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16, prd += 16)
|
||||
for (j = 0; j < 16; j++)
|
||||
prd[j] = (ym[j] ? src[j] : dst[j]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fast_masked_motion_search(unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int yp, int uvp,
|
||||
unsigned char *dy, unsigned char *du, unsigned char *dv,
|
||||
int dyp, int duvp,
|
||||
COLOR_SEG_ELEMENT sgm[],
|
||||
int count,
|
||||
int *mi,
|
||||
int *mj,
|
||||
int *ui,
|
||||
int *uj,
|
||||
int *wm) {
|
||||
int i, j;
|
||||
|
||||
unsigned char ym[256];
|
||||
unsigned char ym2[256];
|
||||
unsigned char uvm[64];
|
||||
unsigned char dym2[256];
|
||||
unsigned char dym[256];
|
||||
unsigned char duvm[64];
|
||||
unsigned int e = 0;
|
||||
int beste = 256;
|
||||
int bmi = -32, bmj = -32;
|
||||
int bui = -32, buj = -32;
|
||||
int beste1 = 256;
|
||||
int bmi1 = -32, bmj1 = -32;
|
||||
int bui1 = -32, buj1 = -32;
|
||||
int obeste;
|
||||
|
||||
// first try finding best mask and then unmasked
|
||||
beste = 0xffffffff;
|
||||
|
||||
#if 0
|
||||
for (i = 0; i < 16; i++) {
|
||||
unsigned char *dy = i * yp + y;
|
||||
for (j = 0; j < 16; j++)
|
||||
printf("%2x", dy[j]);
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
for (i = -32; i < 48; i++) {
|
||||
unsigned char *dyz = i * dyp + dy;
|
||||
for (j = -32; j < 48; j++)
|
||||
printf("%2x", dyz[j]);
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
// find best unmasked mv
|
||||
for (i = -32; i < 32; i++) {
|
||||
unsigned char *dyz = i * dyp + dy;
|
||||
unsigned char *duz = i / 2 * duvp + du;
|
||||
unsigned char *dvz = i / 2 * duvp + dv;
|
||||
for (j = -32; j < 32; j++) {
|
||||
// 0,0 masked destination
|
||||
vp8_makemask_sse3(dyz + j, duz + j / 2, dvz + j / 2, dym, dyp, duvp,
|
||||
sgm[0].y, sgm[0].u, sgm[0].v,
|
||||
sgm[0].yt, sgm[0].ut, sgm[0].vt);
|
||||
|
||||
vp8_growmaskmb_sse3(dym, dym2);
|
||||
|
||||
e = vp9_sad16x16_unmasked_wmt(y, yp, dyz + j, dyp, dym2);
|
||||
|
||||
if (e < beste) {
|
||||
bui = i;
|
||||
buj = j;
|
||||
beste = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
// bui=0;buj=0;
|
||||
// best mv masked destination
|
||||
|
||||
vp8_makemask_sse3(dy + bui * dyp + buj, du + bui / 2 * duvp + buj / 2, dv + bui / 2 * duvp + buj / 2,
|
||||
dym, dyp, duvp,
|
||||
sgm[0].y, sgm[0].u, sgm[0].v,
|
||||
sgm[0].yt, sgm[0].ut, sgm[0].vt);
|
||||
|
||||
vp8_growmaskmb_sse3(dym, dym2);
|
||||
|
||||
obeste = beste;
|
||||
beste = 0xffffffff;
|
||||
|
||||
// find best masked
|
||||
for (i = -32; i < 32; i++) {
|
||||
unsigned char *dyz = i * dyp + dy;
|
||||
for (j = -32; j < 32; j++) {
|
||||
e = vp9_sad16x16_masked_wmt(y, yp, dyz + j, dyp, dym2);
|
||||
if (e < beste) {
|
||||
bmi = i;
|
||||
bmj = j;
|
||||
beste = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
beste1 = beste + obeste;
|
||||
bmi1 = bmi;
|
||||
bmj1 = bmj;
|
||||
bui1 = bui;
|
||||
buj1 = buj;
|
||||
|
||||
// source mask
|
||||
vp8_makemask_sse3(y, u, v,
|
||||
ym, yp, uvp,
|
||||
sgm[0].y, sgm[0].u, sgm[0].v,
|
||||
sgm[0].yt, sgm[0].ut, sgm[0].vt);
|
||||
|
||||
vp8_growmaskmb_sse3(ym, ym2);
|
||||
|
||||
// find best mask
|
||||
for (i = -32; i < 32; i++) {
|
||||
unsigned char *dyz = i * dyp + dy;
|
||||
unsigned char *duz = i / 2 * duvp + du;
|
||||
unsigned char *dvz = i / 2 * duvp + dv;
|
||||
for (j = -32; j < 32; j++) {
|
||||
// 0,0 masked destination
|
||||
vp8_makemask_sse3(dyz + j, duz + j / 2, dvz + j / 2, dym, dyp, duvp,
|
||||
sgm[0].y, sgm[0].u, sgm[0].v,
|
||||
sgm[0].yt, sgm[0].ut, sgm[0].vt);
|
||||
|
||||
vp8_growmaskmb_sse3(dym, dym2);
|
||||
|
||||
e = compare_masks(ym2, dym2);
|
||||
|
||||
if (e < beste) {
|
||||
bmi = i;
|
||||
bmj = j;
|
||||
beste = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vp8_makemask_sse3(dy + bmi * dyp + bmj, du + bmi / 2 * duvp + bmj / 2, dv + bmi / 2 * duvp + bmj / 2,
|
||||
dym, dyp, duvp,
|
||||
sgm[0].y, sgm[0].u, sgm[0].v,
|
||||
sgm[0].yt, sgm[0].ut, sgm[0].vt);
|
||||
|
||||
vp8_growmaskmb_sse3(dym, dym2);
|
||||
|
||||
obeste = vp9_sad16x16_masked_wmt(y, yp, dy + bmi * dyp + bmj, dyp, dym2);
|
||||
|
||||
beste = 0xffffffff;
|
||||
|
||||
// find best unmasked mv
|
||||
for (i = -32; i < 32; i++) {
|
||||
unsigned char *dyz = i * dyp + dy;
|
||||
for (j = -32; j < 32; j++) {
|
||||
e = vp9_sad16x16_unmasked_wmt(y, yp, dyz + j, dyp, dym2);
|
||||
|
||||
if (e < beste) {
|
||||
bui = i;
|
||||
buj = j;
|
||||
beste = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
beste += obeste;
|
||||
|
||||
if (beste < beste1) {
|
||||
*mi = bmi;
|
||||
*mj = bmj;
|
||||
*ui = bui;
|
||||
*uj = buj;
|
||||
*wm = 1;
|
||||
} else {
|
||||
*mi = bmi1;
|
||||
*mj = bmj1;
|
||||
*ui = bui1;
|
||||
*uj = buj1;
|
||||
*wm = 0;
|
||||
beste = beste1;
|
||||
|
||||
}
|
||||
return beste;
|
||||
}
|
||||
|
||||
int predict_all(unsigned char *ym, unsigned char *um, unsigned char *vm,
|
||||
int ymp, int uvmp,
|
||||
unsigned char *yp, unsigned char *up, unsigned char *vp,
|
||||
int ypp, int uvpp,
|
||||
COLOR_SEG_ELEMENT sgm[],
|
||||
int count,
|
||||
int mi,
|
||||
int mj,
|
||||
int ui,
|
||||
int uj,
|
||||
int wm) {
|
||||
int i, j;
|
||||
unsigned char dym[256];
|
||||
unsigned char dym2[256];
|
||||
unsigned char duvm[64];
|
||||
unsigned char *yu = ym, *uu = um, *vu = vm;
|
||||
|
||||
unsigned char *dym3 = dym2;
|
||||
|
||||
ym += mi * ymp + mj;
|
||||
um += mi / 2 * uvmp + mj / 2;
|
||||
vm += mi / 2 * uvmp + mj / 2;
|
||||
|
||||
yu += ui * ymp + uj;
|
||||
uu += ui / 2 * uvmp + uj / 2;
|
||||
vu += ui / 2 * uvmp + uj / 2;
|
||||
|
||||
// best mv masked destination
|
||||
if (wm)
|
||||
vp8_makemask_sse3(ym, um, vm, dym, ymp, uvmp,
|
||||
sgm[0].y, sgm[0].u, sgm[0].v,
|
||||
sgm[0].yt, sgm[0].ut, sgm[0].vt);
|
||||
else
|
||||
vp8_makemask_sse3(yu, uu, vu, dym, ymp, uvmp,
|
||||
sgm[0].y, sgm[0].u, sgm[0].v,
|
||||
sgm[0].yt, sgm[0].ut, sgm[0].vt);
|
||||
|
||||
vp8_growmaskmb_sse3(dym, dym2);
|
||||
vp8_masked_predictor_wmt(ym, yu, ymp, yp, ypp, dym3);
|
||||
vp8_uv_from_y_mask(dym3, duvm);
|
||||
vp8_masked_predictor_uv_wmt(um, uu, uvmp, up, uvpp, duvm);
|
||||
vp8_masked_predictor_uv_wmt(vm, vu, uvmp, vp, uvpp, duvm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned char f0p[1280 * 720 * 3 / 2];
|
||||
unsigned char f1p[1280 * 720 * 3 / 2];
|
||||
unsigned char prd[1280 * 720 * 3 / 2];
|
||||
unsigned char msk[1280 * 720 * 3 / 2];
|
||||
|
||||
|
||||
int mainz(int argc, char *argv[]) {
|
||||
|
||||
FILE *f = fopen(argv[1], "rb");
|
||||
FILE *g = fopen(argv[2], "wb");
|
||||
int w = atoi(argv[3]), h = atoi(argv[4]);
|
||||
int y_stride = w, uv_stride = w / 2;
|
||||
int r, c;
|
||||
unsigned char *f0 = f0p, *f1 = f1p, *t;
|
||||
unsigned char ym[256], uvm[64];
|
||||
unsigned char ym2[256], uvm2[64];
|
||||
unsigned char ym3[256], uvm3[64];
|
||||
int a, b;
|
||||
|
||||
COLOR_SEG_ELEMENT last = { 20, 20, 20, 20, 230, 20, 1}, best;
|
||||
#if 0
|
||||
makeneighbors();
|
||||
COLOR_SEG_ELEMENT segmentation[] = {
|
||||
{ 60, 4, 80, 17, 80, 10, 1},
|
||||
{ 40, 4, 15, 10, 80, 10, 1},
|
||||
};
|
||||
make_mb_mask(y, u, v, ym2, uvm2, 16, 8, segmentation, 1);
|
||||
|
||||
vp8_makemask_sse3(y, u, v, ym, (int) 16, (int) 8,
|
||||
(int) segmentation[0].y, (int) segmentation[0].u, (int) segmentation[0].v,
|
||||
segmentation[0].yt, segmentation[0].ut, segmentation[0].vt);
|
||||
|
||||
vp8_growmaskmb_sse3(ym, ym3);
|
||||
|
||||
a = vp9_sad16x16_masked_wmt(str, 16, sts, 16, ym3);
|
||||
b = vp9_sad16x16_unmasked_wmt(str, 16, sts, 16, ym3);
|
||||
|
||||
vp8_masked_predictor_wmt(str, sts, 16, ym, 16, ym3);
|
||||
|
||||
vp8_uv_from_y_mask(ym3, uvm3);
|
||||
|
||||
return 4;
|
||||
#endif
|
||||
makeneighbors();
|
||||
|
||||
|
||||
memset(prd, 128, w * h * 3 / 2);
|
||||
|
||||
fread(f0, w * h * 3 / 2, 1, f);
|
||||
|
||||
while (!feof(f)) {
|
||||
unsigned char *ys = f1, *yd = f0, *yp = prd;
|
||||
unsigned char *us = f1 + w * h, *ud = f0 + w * h, *up = prd + w * h;
|
||||
unsigned char *vs = f1 + w * h * 5 / 4, *vd = f0 + w * h * 5 / 4, *vp = prd + w * h * 5 / 4;
|
||||
fread(f1, w * h * 3 / 2, 1, f);
|
||||
|
||||
ys += 32 * y_stride;
|
||||
yd += 32 * y_stride;
|
||||
yp += 32 * y_stride;
|
||||
us += 16 * uv_stride;
|
||||
ud += 16 * uv_stride;
|
||||
up += 16 * uv_stride;
|
||||
vs += 16 * uv_stride;
|
||||
vd += 16 * uv_stride;
|
||||
vp += 16 * uv_stride;
|
||||
for (r = 32; r < h - 32; r += 16,
|
||||
ys += 16 * w, yd += 16 * w, yp += 16 * w,
|
||||
us += 8 * uv_stride, ud += 8 * uv_stride, up += 8 * uv_stride,
|
||||
vs += 8 * uv_stride, vd += 8 * uv_stride, vp += 8 * uv_stride) {
|
||||
for (c = 32; c < w - 32; c += 16) {
|
||||
int mi, mj, ui, uj, wm;
|
||||
int bmi, bmj, bui, buj, bwm;
|
||||
unsigned char ym[256];
|
||||
|
||||
if (vp9_sad16x16_sse3(ys + c, y_stride, yd + c, y_stride, 0xffff) == 0)
|
||||
bmi = bmj = bui = buj = bwm = 0;
|
||||
else {
|
||||
COLOR_SEG_ELEMENT cs[5];
|
||||
int j;
|
||||
unsigned int beste = 0xfffffff;
|
||||
unsigned int bestj = 0;
|
||||
|
||||
// try color from last mb segmentation
|
||||
cs[0] = last;
|
||||
|
||||
// try color segs from 4 pixels in mb recon as segmentation
|
||||
cs[1].y = yd[c + y_stride + 1];
|
||||
cs[1].u = ud[c / 2 + uv_stride];
|
||||
cs[1].v = vd[c / 2 + uv_stride];
|
||||
cs[1].yt = cs[1].ut = cs[1].vt = 20;
|
||||
cs[2].y = yd[c + w + 14];
|
||||
cs[2].u = ud[c / 2 + uv_stride + 7];
|
||||
cs[2].v = vd[c / 2 + uv_stride + 7];
|
||||
cs[2].yt = cs[2].ut = cs[2].vt = 20;
|
||||
cs[3].y = yd[c + w * 14 + 1];
|
||||
cs[3].u = ud[c / 2 + uv_stride * 7];
|
||||
cs[3].v = vd[c / 2 + uv_stride * 7];
|
||||
cs[3].yt = cs[3].ut = cs[3].vt = 20;
|
||||
cs[4].y = yd[c + w * 14 + 14];
|
||||
cs[4].u = ud[c / 2 + uv_stride * 7 + 7];
|
||||
cs[4].v = vd[c / 2 + uv_stride * 7 + 7];
|
||||
cs[4].yt = cs[4].ut = cs[4].vt = 20;
|
||||
|
||||
for (j = 0; j < 5; j++) {
|
||||
int e;
|
||||
|
||||
e = fast_masked_motion_search(
|
||||
ys + c, us + c / 2, vs + c / 2, y_stride, uv_stride,
|
||||
yd + c, ud + c / 2, vd + c / 2, y_stride, uv_stride,
|
||||
&cs[j], 1, &mi, &mj, &ui, &uj, &wm);
|
||||
|
||||
if (e < beste) {
|
||||
bmi = mi;
|
||||
bmj = mj;
|
||||
bui = ui;
|
||||
buj = uj, bwm = wm;
|
||||
bestj = j;
|
||||
beste = e;
|
||||
}
|
||||
}
|
||||
best = cs[bestj];
|
||||
// best = segmentation[0];
|
||||
last = best;
|
||||
}
|
||||
predict_all(yd + c, ud + c / 2, vd + c / 2, w, uv_stride,
|
||||
yp + c, up + c / 2, vp + c / 2, w, uv_stride,
|
||||
&best, 1, bmi, bmj, bui, buj, bwm);
|
||||
|
||||
}
|
||||
}
|
||||
fwrite(prd, w * h * 3 / 2, 1, g);
|
||||
t = f0;
|
||||
f0 = f1;
|
||||
f1 = t;
|
||||
|
||||
}
|
||||
fclose(f);
|
||||
fclose(g);
|
||||
return 0;
|
||||
}
|
@ -1,484 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void int vp8_makemask_sse3(
|
||||
; unsigned char *y,
|
||||
; unsigned char *u,
|
||||
; unsigned char *v,
|
||||
; unsigned char *ym,
|
||||
; unsigned char *uvm,
|
||||
; int yp,
|
||||
; int uvp,
|
||||
; int ys,
|
||||
; int us,
|
||||
; int vs,
|
||||
; int yt,
|
||||
; int ut,
|
||||
; int vt)
|
||||
global sym(vp8_makemask_sse3) PRIVATE
|
||||
sym(vp8_makemask_sse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 14
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;y
|
||||
mov rdi, arg(1) ;u
|
||||
mov rcx, arg(2) ;v
|
||||
mov rax, arg(3) ;ym
|
||||
movsxd rbx, dword arg(4) ;yp
|
||||
movsxd rdx, dword arg(5) ;uvp
|
||||
|
||||
pxor xmm0,xmm0
|
||||
|
||||
;make 16 copies of the center y value
|
||||
movd xmm1, arg(6)
|
||||
pshufb xmm1, xmm0
|
||||
|
||||
; make 16 copies of the center u value
|
||||
movd xmm2, arg(7)
|
||||
pshufb xmm2, xmm0
|
||||
|
||||
; make 16 copies of the center v value
|
||||
movd xmm3, arg(8)
|
||||
pshufb xmm3, xmm0
|
||||
unpcklpd xmm2, xmm3
|
||||
|
||||
;make 16 copies of the y tolerance
|
||||
movd xmm3, arg(9)
|
||||
pshufb xmm3, xmm0
|
||||
|
||||
;make 16 copies of the u tolerance
|
||||
movd xmm4, arg(10)
|
||||
pshufb xmm4, xmm0
|
||||
|
||||
;make 16 copies of the v tolerance
|
||||
movd xmm5, arg(11)
|
||||
pshufb xmm5, xmm0
|
||||
unpckhpd xmm4, xmm5
|
||||
|
||||
mov r8,8
|
||||
|
||||
NextPairOfRows:
|
||||
|
||||
;grab the y source values
|
||||
movdqu xmm0, [rsi]
|
||||
|
||||
;compute abs difference between source and y target
|
||||
movdqa xmm6, xmm1
|
||||
movdqa xmm7, xmm0
|
||||
psubusb xmm0, xmm1
|
||||
psubusb xmm6, xmm7
|
||||
por xmm0, xmm6
|
||||
|
||||
;compute abs difference between
|
||||
movdqa xmm6, xmm3
|
||||
pcmpgtb xmm6, xmm0
|
||||
|
||||
;grab the y source values
|
||||
add rsi, rbx
|
||||
movdqu xmm0, [rsi]
|
||||
|
||||
;compute abs difference between source and y target
|
||||
movdqa xmm11, xmm1
|
||||
movdqa xmm7, xmm0
|
||||
psubusb xmm0, xmm1
|
||||
psubusb xmm11, xmm7
|
||||
por xmm0, xmm11
|
||||
|
||||
;compute abs difference between
|
||||
movdqa xmm11, xmm3
|
||||
pcmpgtb xmm11, xmm0
|
||||
|
||||
|
||||
;grab the u and v source values
|
||||
movdqu xmm7, [rdi]
|
||||
movdqu xmm8, [rcx]
|
||||
unpcklpd xmm7, xmm8
|
||||
|
||||
;compute abs difference between source and uv targets
|
||||
movdqa xmm9, xmm2
|
||||
movdqa xmm10, xmm7
|
||||
psubusb xmm7, xmm2
|
||||
psubusb xmm9, xmm10
|
||||
por xmm7, xmm9
|
||||
|
||||
;check whether the number is < tolerance
|
||||
movdqa xmm0, xmm4
|
||||
pcmpgtb xmm0, xmm7
|
||||
|
||||
;double u and v masks
|
||||
movdqa xmm8, xmm0
|
||||
punpckhbw xmm0, xmm0
|
||||
punpcklbw xmm8, xmm8
|
||||
|
||||
;mask row 0 and output
|
||||
pand xmm6, xmm8
|
||||
pand xmm6, xmm0
|
||||
movdqa [rax],xmm6
|
||||
|
||||
;mask row 1 and output
|
||||
pand xmm11, xmm8
|
||||
pand xmm11, xmm0
|
||||
movdqa [rax+16],xmm11
|
||||
|
||||
|
||||
; to the next row or set of rows
|
||||
add rsi, rbx
|
||||
add rdi, rdx
|
||||
add rcx, rdx
|
||||
add rax,32
|
||||
dec r8
|
||||
jnz NextPairOfRows
|
||||
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;GROW_HORIZ (register for result, source register or mem local)
|
||||
; takes source and shifts left and ors with source
|
||||
; then shifts right and ors with source
|
||||
%macro GROW_HORIZ 2
|
||||
movdqa %1, %2
|
||||
movdqa xmm14, %1
|
||||
movdqa xmm15, %1
|
||||
pslldq xmm14, 1
|
||||
psrldq xmm15, 1
|
||||
por %1,xmm14
|
||||
por %1,xmm15
|
||||
%endmacro
|
||||
;GROW_VERT (result, center row, above row, below row)
|
||||
%macro GROW_VERT 4
|
||||
movdqa %1,%2
|
||||
por %1,%3
|
||||
por %1,%4
|
||||
%endmacro
|
||||
|
||||
;GROW_NEXTLINE (new line to grow, new source, line to write)
|
||||
%macro GROW_NEXTLINE 3
|
||||
GROW_HORIZ %1, %2
|
||||
GROW_VERT xmm3, xmm0, xmm1, xmm2
|
||||
movdqa %3,xmm3
|
||||
%endmacro
|
||||
|
||||
|
||||
;void int vp8_growmaskmb_sse3(
|
||||
; unsigned char *om,
|
||||
; unsigned char *nm,
|
||||
global sym(vp8_growmaskmb_sse3) PRIVATE
|
||||
sym(vp8_growmaskmb_sse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 2
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src
|
||||
mov rdi, arg(1) ;rst
|
||||
|
||||
GROW_HORIZ xmm0, [rsi]
|
||||
GROW_HORIZ xmm1, [rsi+16]
|
||||
GROW_HORIZ xmm2, [rsi+32]
|
||||
|
||||
GROW_VERT xmm3, xmm0, xmm1, xmm2
|
||||
por xmm0,xmm1
|
||||
movdqa [rdi], xmm0
|
||||
movdqa [rdi+16],xmm3
|
||||
|
||||
GROW_NEXTLINE xmm0,[rsi+48],[rdi+32]
|
||||
GROW_NEXTLINE xmm1,[rsi+64],[rdi+48]
|
||||
GROW_NEXTLINE xmm2,[rsi+80],[rdi+64]
|
||||
GROW_NEXTLINE xmm0,[rsi+96],[rdi+80]
|
||||
GROW_NEXTLINE xmm1,[rsi+112],[rdi+96]
|
||||
GROW_NEXTLINE xmm2,[rsi+128],[rdi+112]
|
||||
GROW_NEXTLINE xmm0,[rsi+144],[rdi+128]
|
||||
GROW_NEXTLINE xmm1,[rsi+160],[rdi+144]
|
||||
GROW_NEXTLINE xmm2,[rsi+176],[rdi+160]
|
||||
GROW_NEXTLINE xmm0,[rsi+192],[rdi+176]
|
||||
GROW_NEXTLINE xmm1,[rsi+208],[rdi+192]
|
||||
GROW_NEXTLINE xmm2,[rsi+224],[rdi+208]
|
||||
GROW_NEXTLINE xmm0,[rsi+240],[rdi+224]
|
||||
|
||||
por xmm0,xmm2
|
||||
movdqa [rdi+240], xmm0
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
|
||||
;unsigned int vp8_sad16x16_masked_wmt(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned char *mask)
|
||||
global sym(vp8_sad16x16_masked_wmt) PRIVATE
|
||||
sym(vp8_sad16x16_masked_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
mov rbx, arg(4) ;mask
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
mov rcx, 16
|
||||
|
||||
pxor xmm3, xmm3
|
||||
|
||||
NextSadRow:
|
||||
movdqu xmm0, [rsi]
|
||||
movdqu xmm1, [rdi]
|
||||
movdqu xmm2, [rbx]
|
||||
pand xmm0, xmm2
|
||||
pand xmm1, xmm2
|
||||
|
||||
psadbw xmm0, xmm1
|
||||
paddw xmm3, xmm0
|
||||
|
||||
add rsi, rax
|
||||
add rdi, rdx
|
||||
add rbx, 16
|
||||
|
||||
dec rcx
|
||||
jnz NextSadRow
|
||||
|
||||
movdqa xmm4 , xmm3
|
||||
psrldq xmm4, 8
|
||||
paddw xmm3, xmm4
|
||||
movq rax, xmm3
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;unsigned int vp8_sad16x16_unmasked_wmt(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned char *mask)
|
||||
global sym(vp8_sad16x16_unmasked_wmt) PRIVATE
|
||||
sym(vp8_sad16x16_unmasked_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
mov rbx, arg(4) ;mask
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
mov rcx, 16
|
||||
|
||||
pxor xmm3, xmm3
|
||||
|
||||
next_vp8_sad16x16_unmasked_wmt:
|
||||
movdqu xmm0, [rsi]
|
||||
movdqu xmm1, [rdi]
|
||||
movdqu xmm2, [rbx]
|
||||
por xmm0, xmm2
|
||||
por xmm1, xmm2
|
||||
|
||||
psadbw xmm0, xmm1
|
||||
paddw xmm3, xmm0
|
||||
|
||||
add rsi, rax
|
||||
add rdi, rdx
|
||||
add rbx, 16
|
||||
|
||||
dec rcx
|
||||
jnz next_vp8_sad16x16_unmasked_wmt
|
||||
|
||||
movdqa xmm4 , xmm3
|
||||
psrldq xmm4, 8
|
||||
paddw xmm3, xmm4
|
||||
movq rax, xmm3
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;unsigned int vp8_masked_predictor_wmt(
|
||||
; unsigned char *masked,
|
||||
; unsigned char *unmasked,
|
||||
; int src_stride,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; unsigned char *mask)
|
||||
global sym(vp8_masked_predictor_wmt) PRIVATE
|
||||
sym(vp8_masked_predictor_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(1) ;ref_ptr
|
||||
|
||||
mov rbx, arg(5) ;mask
|
||||
movsxd rax, dword ptr arg(2) ;src_stride
|
||||
mov r11, arg(3) ; destination
|
||||
movsxd rdx, dword ptr arg(4) ;dst_stride
|
||||
|
||||
mov rcx, 16
|
||||
|
||||
pxor xmm3, xmm3
|
||||
|
||||
next_vp8_masked_predictor_wmt:
|
||||
movdqu xmm0, [rsi]
|
||||
movdqu xmm1, [rdi]
|
||||
movdqu xmm2, [rbx]
|
||||
|
||||
pand xmm0, xmm2
|
||||
pandn xmm2, xmm1
|
||||
por xmm0, xmm2
|
||||
movdqu [r11], xmm0
|
||||
|
||||
add r11, rdx
|
||||
add rsi, rax
|
||||
add rdi, rdx
|
||||
add rbx, 16
|
||||
|
||||
dec rcx
|
||||
jnz next_vp8_masked_predictor_wmt
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;unsigned int vp8_masked_predictor_uv_wmt(
|
||||
; unsigned char *masked,
|
||||
; unsigned char *unmasked,
|
||||
; int src_stride,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; unsigned char *mask)
|
||||
global sym(vp8_masked_predictor_uv_wmt) PRIVATE
|
||||
sym(vp8_masked_predictor_uv_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(1) ;ref_ptr
|
||||
|
||||
mov rbx, arg(5) ;mask
|
||||
movsxd rax, dword ptr arg(2) ;src_stride
|
||||
mov r11, arg(3) ; destination
|
||||
movsxd rdx, dword ptr arg(4) ;dst_stride
|
||||
|
||||
mov rcx, 8
|
||||
|
||||
pxor xmm3, xmm3
|
||||
|
||||
next_vp8_masked_predictor_uv_wmt:
|
||||
movq xmm0, [rsi]
|
||||
movq xmm1, [rdi]
|
||||
movq xmm2, [rbx]
|
||||
|
||||
pand xmm0, xmm2
|
||||
pandn xmm2, xmm1
|
||||
por xmm0, xmm2
|
||||
movq [r11], xmm0
|
||||
|
||||
add r11, rdx
|
||||
add rsi, rax
|
||||
add rdi, rax
|
||||
add rbx, 8
|
||||
|
||||
dec rcx
|
||||
jnz next_vp8_masked_predictor_uv_wmt
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;unsigned int vp8_uv_from_y_mask(
|
||||
; unsigned char *ymask,
|
||||
; unsigned char *uvmask)
|
||||
global sym(vp8_uv_from_y_mask) PRIVATE
|
||||
sym(vp8_uv_from_y_mask):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(1) ;dst_ptr
|
||||
|
||||
|
||||
mov rcx, 8
|
||||
|
||||
pxor xmm3, xmm3
|
||||
|
||||
next_p8_uv_from_y_mask:
|
||||
movdqu xmm0, [rsi]
|
||||
pshufb xmm0, [shuf1b] ;[GLOBAL(shuf1b)]
|
||||
movq [rdi],xmm0
|
||||
add rdi, 8
|
||||
add rsi,32
|
||||
|
||||
dec rcx
|
||||
jnz next_p8_uv_from_y_mask
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
shuf1b:
|
||||
db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
@ -87,12 +87,6 @@ VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm
|
||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
|
||||
endif
|
||||
|
||||
# common (c)
|
||||
ifeq ($(CONFIG_CSM),yes)
|
||||
VP9_COMMON_SRCS-yes += common/vp9_maskingmv.c
|
||||
VP9_COMMON_SRCS-$(HAVE_SSE3) += common/x86/vp9_mask_sse3.asm
|
||||
endif
|
||||
|
||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
|
||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_sadmxn_sse2.c
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user