Patch by Dzonatas and Callum Lerwick. Fp/vectorization patch which basically converts most of the irreversible decode codepath to floating point, eliminating a few rounds of int/fp conversion, resulting in a vast performance improvement, and an increase in accuracy.
This commit is contained in:
210
libopenjpeg/t1.c
210
libopenjpeg/t1.c
@@ -802,24 +802,22 @@ static void t1_encode_cblk(
|
||||
int numcomps,
|
||||
opj_tcd_tile_t * tile)
|
||||
{
|
||||
int i, j;
|
||||
int passno;
|
||||
int bpno, passtype;
|
||||
int max;
|
||||
int nmsedec = 0;
|
||||
double cumwmsedec = 0.0;
|
||||
|
||||
opj_mqc_t *mqc = t1->mqc; /* MQC component */
|
||||
|
||||
int passno, bpno, passtype;
|
||||
int nmsedec = 0;
|
||||
int i, max;
|
||||
char type = T1_TYPE_MQ;
|
||||
double tempwmsedec;
|
||||
|
||||
opj_mqc_t *mqc = t1->mqc; /* MQC component */
|
||||
|
||||
|
||||
max = 0;
|
||||
for (j = 0; j < t1->h; ++j) {
|
||||
for (i = 0; i < t1->w; ++i) {
|
||||
max = int_max(max, int_abs(t1->data[(j * t1->w) + i]));
|
||||
}
|
||||
for (i = 0; i < t1->w * t1->h; ++i) {
|
||||
int tmp = abs(t1->data[i]);
|
||||
max = int_max(max, tmp);
|
||||
}
|
||||
|
||||
|
||||
cblk->numbps = max ? (int_floorlog2(max) + 1) - T1_NMSEDEC_FRACBITS : 0;
|
||||
|
||||
bpno = cblk->numbps - 1;
|
||||
@@ -932,12 +930,12 @@ static void t1_decode_cblk(
|
||||
int roishift,
|
||||
int cblksty)
|
||||
{
|
||||
opj_raw_t *raw = t1->raw; /* RAW component */
|
||||
opj_mqc_t *mqc = t1->mqc; /* MQC component */
|
||||
|
||||
int bpno, passtype;
|
||||
int segno, passno;
|
||||
char type = T1_TYPE_MQ; /* BYPASS mode */
|
||||
|
||||
opj_raw_t *raw = t1->raw; /* RAW component */
|
||||
opj_mqc_t *mqc = t1->mqc; /* MQC component */
|
||||
|
||||
if(!allocate_buffers(
|
||||
t1,
|
||||
@@ -1034,23 +1032,29 @@ void t1_encode_cblks(
|
||||
tile->distotile = 0; /* fixed_quality */
|
||||
|
||||
for (compno = 0; compno < tile->numcomps; ++compno) {
|
||||
opj_tcd_tilecomp_t *tilec = &tile->comps[compno];
|
||||
opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
|
||||
opj_tccp_t* tccp = &tcp->tccps[compno];
|
||||
int tile_w = tilec->x1 - tilec->x0;
|
||||
|
||||
for (resno = 0; resno < tilec->numresolutions; ++resno) {
|
||||
opj_tcd_resolution_t *res = &tilec->resolutions[resno];
|
||||
|
||||
for (bandno = 0; bandno < res->numbands; ++bandno) {
|
||||
opj_tcd_band_t *band = &res->bands[bandno];
|
||||
opj_tcd_band_t* restrict band = &res->bands[bandno];
|
||||
|
||||
for (precno = 0; precno < res->pw * res->ph; ++precno) {
|
||||
opj_tcd_precinct_t *prc = &band->precincts[precno];
|
||||
|
||||
for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
|
||||
int x, y, w, i, j;
|
||||
opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
|
||||
int* restrict datap;
|
||||
int* restrict tiledp;
|
||||
int cblk_w;
|
||||
int cblk_h;
|
||||
int i, j;
|
||||
|
||||
x = cblk->x0 - band->x0;
|
||||
y = cblk->y0 - band->y0;
|
||||
int x = cblk->x0 - band->x0;
|
||||
int y = cblk->y0 - band->y0;
|
||||
if (band->bandno & 1) {
|
||||
opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
|
||||
x += pres->x1 - pres->x0;
|
||||
@@ -1068,20 +1072,25 @@ void t1_encode_cblks(
|
||||
return;
|
||||
}
|
||||
|
||||
w = tilec->x1 - tilec->x0;
|
||||
if (tcp->tccps[compno].qmfbid == 1) {
|
||||
for (j = 0; j < t1->h; ++j) {
|
||||
for (i = 0; i < t1->w; ++i) {
|
||||
t1->data[(j * t1->w) + i] =
|
||||
tilec->data[(x + i) + (y + j) * w] << T1_NMSEDEC_FRACBITS;
|
||||
datap=t1->data;
|
||||
cblk_w = t1->w;
|
||||
cblk_h = t1->h;
|
||||
|
||||
tiledp=&tilec->data[(y * tile_w) + x];
|
||||
if (tccp->qmfbid == 1) {
|
||||
for (j = 0; j < cblk_h; ++j) {
|
||||
for (i = 0; i < cblk_w; ++i) {
|
||||
int tmp = tiledp[(j * tile_w) + i];
|
||||
datap[(j * cblk_w) + i] = tmp << T1_NMSEDEC_FRACBITS;
|
||||
}
|
||||
}
|
||||
} else { /* if (tcp->tccps[compno].qmfbid == 0) */
|
||||
for (j = 0; j < t1->h; ++j) {
|
||||
for (i = 0; i < t1->w; ++i) {
|
||||
t1->data[(j * t1->w) + i] =
|
||||
} else { /* if (tccp->qmfbid == 0) */
|
||||
for (j = 0; j < cblk_h; ++j) {
|
||||
for (i = 0; i < cblk_w; ++i) {
|
||||
int tmp = tiledp[(j * tile_w) + i];
|
||||
datap[(j * cblk_w) + i] =
|
||||
fix_mul(
|
||||
tilec->data[x + i + (y + j) * w],
|
||||
tmp,
|
||||
8192 * 8192 / ((int) floor(band->stepsize * 8192))) >> (11 - T1_NMSEDEC_FRACBITS);
|
||||
}
|
||||
}
|
||||
@@ -1093,9 +1102,9 @@ void t1_encode_cblks(
|
||||
band->bandno,
|
||||
compno,
|
||||
tilec->numresolutions - 1 - resno,
|
||||
tcp->tccps[compno].qmfbid,
|
||||
tccp->qmfbid,
|
||||
band->stepsize,
|
||||
tcp->tccps[compno].cblksty,
|
||||
tccp->cblksty,
|
||||
tile->numcomps,
|
||||
tile);
|
||||
|
||||
@@ -1107,87 +1116,86 @@ void t1_encode_cblks(
|
||||
}
|
||||
|
||||
void t1_decode_cblks(
|
||||
opj_t1_t *t1,
|
||||
opj_tcd_tile_t *tile,
|
||||
opj_tcp_t *tcp)
|
||||
opj_t1_t* t1,
|
||||
opj_tcd_tilecomp_t* tilec,
|
||||
opj_tccp_t* tccp)
|
||||
{
|
||||
int compno, resno, bandno, precno, cblkno;
|
||||
int resno, bandno, precno, cblkno;
|
||||
|
||||
for (compno = 0; compno < tile->numcomps; ++compno) {
|
||||
opj_tcd_tilecomp_t *tilec = &tile->comps[compno];
|
||||
int tile_w = tilec->x1 - tilec->x0;
|
||||
|
||||
for (resno = 0; resno < tilec->numresolutions; ++resno) {
|
||||
opj_tcd_resolution_t *res = &tilec->resolutions[resno];
|
||||
for (resno = 0; resno < tilec->numresolutions; ++resno) {
|
||||
opj_tcd_resolution_t* res = &tilec->resolutions[resno];
|
||||
|
||||
for (bandno = 0; bandno < res->numbands; ++bandno) {
|
||||
opj_tcd_band_t *band = &res->bands[bandno];
|
||||
for (bandno = 0; bandno < res->numbands; ++bandno) {
|
||||
opj_tcd_band_t* restrict band = &res->bands[bandno];
|
||||
|
||||
for (precno = 0; precno < res->pw * res->ph; ++precno) {
|
||||
opj_tcd_precinct_t *prc = &band->precincts[precno];
|
||||
for (precno = 0; precno < res->pw * res->ph; ++precno) {
|
||||
opj_tcd_precinct_t* prc = &band->precincts[precno];
|
||||
|
||||
for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
|
||||
int x, y, w, i, j, cblk_w, cblk_h;
|
||||
opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
|
||||
for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
|
||||
opj_tcd_cblk_t* cblk = &prc->cblks[cblkno];
|
||||
int* restrict datap;
|
||||
void* restrict tiledp;
|
||||
int cblk_w, cblk_h;
|
||||
int x, y;
|
||||
int i, j;
|
||||
|
||||
t1_decode_cblk(
|
||||
t1,
|
||||
cblk,
|
||||
band->bandno,
|
||||
tcp->tccps[compno].roishift,
|
||||
tcp->tccps[compno].cblksty);
|
||||
t1_decode_cblk(
|
||||
t1,
|
||||
cblk,
|
||||
band->bandno,
|
||||
tccp->roishift,
|
||||
tccp->cblksty);
|
||||
|
||||
x = cblk->x0 - band->x0;
|
||||
y = cblk->y0 - band->y0;
|
||||
if (band->bandno & 1) {
|
||||
opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
|
||||
x += pres->x1 - pres->x0;
|
||||
}
|
||||
if (band->bandno & 2) {
|
||||
opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
|
||||
y += pres->y1 - pres->y0;
|
||||
}
|
||||
x = cblk->x0 - band->x0;
|
||||
y = cblk->y0 - band->y0;
|
||||
if (band->bandno & 1) {
|
||||
opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
|
||||
x += pres->x1 - pres->x0;
|
||||
}
|
||||
if (band->bandno & 2) {
|
||||
opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
|
||||
y += pres->y1 - pres->y0;
|
||||
}
|
||||
|
||||
cblk_w = cblk->x1 - cblk->x0;
|
||||
cblk_h = cblk->y1 - cblk->y0;
|
||||
datap=t1->data;
|
||||
cblk_w = t1->w;
|
||||
cblk_h = t1->h;
|
||||
|
||||
if (tcp->tccps[compno].roishift) {
|
||||
int thresh = 1 << tcp->tccps[compno].roishift;
|
||||
for (j = 0; j < cblk_h; ++j) {
|
||||
for (i = 0; i < cblk_w; ++i) {
|
||||
int val = t1->data[(j * t1->w) + i];
|
||||
int mag = int_abs(val);
|
||||
if (mag >= thresh) {
|
||||
mag >>= tcp->tccps[compno].roishift;
|
||||
t1->data[(j * t1->w) + i] = val < 0 ? -mag : mag;
|
||||
}
|
||||
if (tccp->roishift) {
|
||||
int thresh = 1 << tccp->roishift;
|
||||
for (j = 0; j < cblk_h; ++j) {
|
||||
for (i = 0; i < cblk_w; ++i) {
|
||||
int val = datap[(j * cblk_w) + i];
|
||||
int mag = abs(val);
|
||||
if (mag >= thresh) {
|
||||
mag >>= tccp->roishift;
|
||||
datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
w = tilec->x1 - tilec->x0;
|
||||
if (tcp->tccps[compno].qmfbid == 1) {
|
||||
for (j = 0; j < cblk_h; ++j) {
|
||||
for (i = 0; i < cblk_w; ++i) {
|
||||
tilec->data[x + i + (y + j) * w] = t1->data[(j * t1->w) + i]/2;
|
||||
}
|
||||
}
|
||||
} else { /* if (tcp->tccps[compno].qmfbid == 0) */
|
||||
for (j = 0; j < cblk_h; ++j) {
|
||||
for (i = 0; i < cblk_w; ++i) {
|
||||
if (t1->data[(j * t1->w) + i] >> 1 == 0) {
|
||||
tilec->data[x + i + (y + j) * w] = 0;
|
||||
} else {
|
||||
double tmp = (double)(t1->data[(j * t1->w) + i] * band->stepsize * 4096.0);
|
||||
int tmp2 = ((int) (floor(fabs(tmp)))) + ((int) floor(fabs(tmp*2))%2);
|
||||
tilec->data[x + i + (y + j) * w] = ((tmp<0)?-tmp2:tmp2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tiledp=(void*)&tilec->data[(y * tile_w) + x];
|
||||
if (tccp->qmfbid == 1) {
|
||||
for (j = 0; j < cblk_h; ++j) {
|
||||
for (i = 0; i < cblk_w; ++i) {
|
||||
int tmp = datap[(j * cblk_w) + i];
|
||||
((int*)tiledp)[(j * tile_w) + i] = tmp / 2;
|
||||
}
|
||||
}
|
||||
} /* cblkno */
|
||||
} /* precno */
|
||||
} /* bandno */
|
||||
} /* resno */
|
||||
} /* compno */
|
||||
} else { /* if (tccp->qmfbid == 0) */
|
||||
for (j = 0; j < cblk_h; ++j) {
|
||||
for (i = 0; i < cblk_w; ++i) {
|
||||
float tmp = datap[(j * cblk_w) + i] * band->stepsize;
|
||||
((float*)tiledp)[(j * tile_w) + i] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
} /* cblkno */
|
||||
} /* precno */
|
||||
} /* bandno */
|
||||
} /* resno */
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user