Patch by Dzonatas and Callum Lerwick. Fp/vectorization patch which basically converts most of the irreversible decode codepath to floating point, eliminating a few rounds of int/fp conversion, resulting in a vast performance improvement, and an increase in accuracy.

This commit is contained in:
Francois-Olivier Devaux
2007-11-13 17:35:12 +00:00
parent 014694b04f
commit dbeebe72b9
9 changed files with 624 additions and 382 deletions

View File

@@ -802,24 +802,22 @@ static void t1_encode_cblk(
int numcomps,
opj_tcd_tile_t * tile)
{
int i, j;
int passno;
int bpno, passtype;
int max;
int nmsedec = 0;
double cumwmsedec = 0.0;
opj_mqc_t *mqc = t1->mqc; /* MQC component */
int passno, bpno, passtype;
int nmsedec = 0;
int i, max;
char type = T1_TYPE_MQ;
double tempwmsedec;
opj_mqc_t *mqc = t1->mqc; /* MQC component */
max = 0;
for (j = 0; j < t1->h; ++j) {
for (i = 0; i < t1->w; ++i) {
max = int_max(max, int_abs(t1->data[(j * t1->w) + i]));
}
for (i = 0; i < t1->w * t1->h; ++i) {
int tmp = abs(t1->data[i]);
max = int_max(max, tmp);
}
cblk->numbps = max ? (int_floorlog2(max) + 1) - T1_NMSEDEC_FRACBITS : 0;
bpno = cblk->numbps - 1;
@@ -932,12 +930,12 @@ static void t1_decode_cblk(
int roishift,
int cblksty)
{
opj_raw_t *raw = t1->raw; /* RAW component */
opj_mqc_t *mqc = t1->mqc; /* MQC component */
int bpno, passtype;
int segno, passno;
char type = T1_TYPE_MQ; /* BYPASS mode */
opj_raw_t *raw = t1->raw; /* RAW component */
opj_mqc_t *mqc = t1->mqc; /* MQC component */
if(!allocate_buffers(
t1,
@@ -1034,23 +1032,29 @@ void t1_encode_cblks(
tile->distotile = 0; /* fixed_quality */
for (compno = 0; compno < tile->numcomps; ++compno) {
opj_tcd_tilecomp_t *tilec = &tile->comps[compno];
opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
opj_tccp_t* tccp = &tcp->tccps[compno];
int tile_w = tilec->x1 - tilec->x0;
for (resno = 0; resno < tilec->numresolutions; ++resno) {
opj_tcd_resolution_t *res = &tilec->resolutions[resno];
for (bandno = 0; bandno < res->numbands; ++bandno) {
opj_tcd_band_t *band = &res->bands[bandno];
opj_tcd_band_t* restrict band = &res->bands[bandno];
for (precno = 0; precno < res->pw * res->ph; ++precno) {
opj_tcd_precinct_t *prc = &band->precincts[precno];
for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
int x, y, w, i, j;
opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
int* restrict datap;
int* restrict tiledp;
int cblk_w;
int cblk_h;
int i, j;
x = cblk->x0 - band->x0;
y = cblk->y0 - band->y0;
int x = cblk->x0 - band->x0;
int y = cblk->y0 - band->y0;
if (band->bandno & 1) {
opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
x += pres->x1 - pres->x0;
@@ -1068,20 +1072,25 @@ void t1_encode_cblks(
return;
}
w = tilec->x1 - tilec->x0;
if (tcp->tccps[compno].qmfbid == 1) {
for (j = 0; j < t1->h; ++j) {
for (i = 0; i < t1->w; ++i) {
t1->data[(j * t1->w) + i] =
tilec->data[(x + i) + (y + j) * w] << T1_NMSEDEC_FRACBITS;
datap=t1->data;
cblk_w = t1->w;
cblk_h = t1->h;
tiledp=&tilec->data[(y * tile_w) + x];
if (tccp->qmfbid == 1) {
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
int tmp = tiledp[(j * tile_w) + i];
datap[(j * cblk_w) + i] = tmp << T1_NMSEDEC_FRACBITS;
}
}
} else { /* if (tcp->tccps[compno].qmfbid == 0) */
for (j = 0; j < t1->h; ++j) {
for (i = 0; i < t1->w; ++i) {
t1->data[(j * t1->w) + i] =
} else { /* if (tccp->qmfbid == 0) */
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
int tmp = tiledp[(j * tile_w) + i];
datap[(j * cblk_w) + i] =
fix_mul(
tilec->data[x + i + (y + j) * w],
tmp,
8192 * 8192 / ((int) floor(band->stepsize * 8192))) >> (11 - T1_NMSEDEC_FRACBITS);
}
}
@@ -1093,9 +1102,9 @@ void t1_encode_cblks(
band->bandno,
compno,
tilec->numresolutions - 1 - resno,
tcp->tccps[compno].qmfbid,
tccp->qmfbid,
band->stepsize,
tcp->tccps[compno].cblksty,
tccp->cblksty,
tile->numcomps,
tile);
@@ -1107,87 +1116,86 @@ void t1_encode_cblks(
}
void t1_decode_cblks(
opj_t1_t *t1,
opj_tcd_tile_t *tile,
opj_tcp_t *tcp)
opj_t1_t* t1,
opj_tcd_tilecomp_t* tilec,
opj_tccp_t* tccp)
{
int compno, resno, bandno, precno, cblkno;
int resno, bandno, precno, cblkno;
for (compno = 0; compno < tile->numcomps; ++compno) {
opj_tcd_tilecomp_t *tilec = &tile->comps[compno];
int tile_w = tilec->x1 - tilec->x0;
for (resno = 0; resno < tilec->numresolutions; ++resno) {
opj_tcd_resolution_t *res = &tilec->resolutions[resno];
for (resno = 0; resno < tilec->numresolutions; ++resno) {
opj_tcd_resolution_t* res = &tilec->resolutions[resno];
for (bandno = 0; bandno < res->numbands; ++bandno) {
opj_tcd_band_t *band = &res->bands[bandno];
for (bandno = 0; bandno < res->numbands; ++bandno) {
opj_tcd_band_t* restrict band = &res->bands[bandno];
for (precno = 0; precno < res->pw * res->ph; ++precno) {
opj_tcd_precinct_t *prc = &band->precincts[precno];
for (precno = 0; precno < res->pw * res->ph; ++precno) {
opj_tcd_precinct_t* prc = &band->precincts[precno];
for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
int x, y, w, i, j, cblk_w, cblk_h;
opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
opj_tcd_cblk_t* cblk = &prc->cblks[cblkno];
int* restrict datap;
void* restrict tiledp;
int cblk_w, cblk_h;
int x, y;
int i, j;
t1_decode_cblk(
t1,
cblk,
band->bandno,
tcp->tccps[compno].roishift,
tcp->tccps[compno].cblksty);
t1_decode_cblk(
t1,
cblk,
band->bandno,
tccp->roishift,
tccp->cblksty);
x = cblk->x0 - band->x0;
y = cblk->y0 - band->y0;
if (band->bandno & 1) {
opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
x += pres->x1 - pres->x0;
}
if (band->bandno & 2) {
opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
y += pres->y1 - pres->y0;
}
x = cblk->x0 - band->x0;
y = cblk->y0 - band->y0;
if (band->bandno & 1) {
opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
x += pres->x1 - pres->x0;
}
if (band->bandno & 2) {
opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
y += pres->y1 - pres->y0;
}
cblk_w = cblk->x1 - cblk->x0;
cblk_h = cblk->y1 - cblk->y0;
datap=t1->data;
cblk_w = t1->w;
cblk_h = t1->h;
if (tcp->tccps[compno].roishift) {
int thresh = 1 << tcp->tccps[compno].roishift;
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
int val = t1->data[(j * t1->w) + i];
int mag = int_abs(val);
if (mag >= thresh) {
mag >>= tcp->tccps[compno].roishift;
t1->data[(j * t1->w) + i] = val < 0 ? -mag : mag;
}
if (tccp->roishift) {
int thresh = 1 << tccp->roishift;
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
int val = datap[(j * cblk_w) + i];
int mag = abs(val);
if (mag >= thresh) {
mag >>= tccp->roishift;
datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
}
}
}
w = tilec->x1 - tilec->x0;
if (tcp->tccps[compno].qmfbid == 1) {
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
tilec->data[x + i + (y + j) * w] = t1->data[(j * t1->w) + i]/2;
}
}
} else { /* if (tcp->tccps[compno].qmfbid == 0) */
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
if (t1->data[(j * t1->w) + i] >> 1 == 0) {
tilec->data[x + i + (y + j) * w] = 0;
} else {
double tmp = (double)(t1->data[(j * t1->w) + i] * band->stepsize * 4096.0);
int tmp2 = ((int) (floor(fabs(tmp)))) + ((int) floor(fabs(tmp*2))%2);
tilec->data[x + i + (y + j) * w] = ((tmp<0)?-tmp2:tmp2);
}
}
}
tiledp=(void*)&tilec->data[(y * tile_w) + x];
if (tccp->qmfbid == 1) {
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
int tmp = datap[(j * cblk_w) + i];
((int*)tiledp)[(j * tile_w) + i] = tmp / 2;
}
}
} /* cblkno */
} /* precno */
} /* bandno */
} /* resno */
} /* compno */
} else { /* if (tccp->qmfbid == 0) */
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
float tmp = datap[(j * cblk_w) + i] * band->stepsize;
((float*)tiledp)[(j * tile_w) + i] = tmp;
}
}
}
} /* cblkno */
} /* precno */
} /* bandno */
} /* resno */
}