* introducing dct248 into the DSP context.
* simple/accurate implementation of dct248 * DV encoding now supports 2-4-8 DCT * DV encoding gets a bit faster (but still miles away from what I think it could do) * misc. DV codec cleanups Originally committed as revision 2425 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
01a2ddaf85
commit
10acc47995
@ -44,6 +44,19 @@ const uint8_t ff_zigzag_direct[64] = {
|
|||||||
53, 60, 61, 54, 47, 55, 62, 63
|
53, 60, 61, 54, 47, 55, 62, 63
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Specific zigzag scan for 248 idct. NOTE that unlike the
|
||||||
|
specification, we interleave the fields */
|
||||||
|
const uint8_t ff_zigzag248_direct[64] = {
|
||||||
|
0, 8, 1, 9, 16, 24, 2, 10,
|
||||||
|
17, 25, 32, 40, 48, 56, 33, 41,
|
||||||
|
18, 26, 3, 11, 4, 12, 19, 27,
|
||||||
|
34, 42, 49, 57, 50, 58, 35, 43,
|
||||||
|
20, 28, 5, 13, 6, 14, 21, 29,
|
||||||
|
36, 44, 51, 59, 52, 60, 37, 45,
|
||||||
|
22, 30, 7, 15, 23, 31, 38, 46,
|
||||||
|
53, 61, 54, 62, 39, 47, 55, 63,
|
||||||
|
};
|
||||||
|
|
||||||
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
|
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
|
||||||
uint16_t __align8 inv_zigzag_direct16[64];
|
uint16_t __align8 inv_zigzag_direct16[64];
|
||||||
|
|
||||||
@ -2869,12 +2882,18 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
#ifdef CONFIG_ENCODERS
|
#ifdef CONFIG_ENCODERS
|
||||||
if(avctx->dct_algo==FF_DCT_FASTINT)
|
if(avctx->dct_algo==FF_DCT_FASTINT) {
|
||||||
c->fdct = fdct_ifast;
|
c->fdct = fdct_ifast;
|
||||||
else if(avctx->dct_algo==FF_DCT_FAAN)
|
c->fdct248 = ff_fdct248_islow; // FIXME: need an optimized version
|
||||||
|
}
|
||||||
|
else if(avctx->dct_algo==FF_DCT_FAAN) {
|
||||||
c->fdct = ff_faandct;
|
c->fdct = ff_faandct;
|
||||||
else
|
c->fdct248 = ff_fdct248_islow; // FIXME: need an optimized version
|
||||||
|
}
|
||||||
|
else {
|
||||||
c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
|
c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
|
||||||
|
c->fdct248 = ff_fdct248_islow;
|
||||||
|
}
|
||||||
#endif //CONFIG_ENCODERS
|
#endif //CONFIG_ENCODERS
|
||||||
|
|
||||||
if(avctx->idct_algo==FF_IDCT_INT){
|
if(avctx->idct_algo==FF_IDCT_INT){
|
||||||
|
@ -37,6 +37,7 @@ typedef short DCTELEM;
|
|||||||
|
|
||||||
void fdct_ifast (DCTELEM *data);
|
void fdct_ifast (DCTELEM *data);
|
||||||
void ff_jpeg_fdct_islow (DCTELEM *data);
|
void ff_jpeg_fdct_islow (DCTELEM *data);
|
||||||
|
void ff_fdct248_islow (DCTELEM *data);
|
||||||
|
|
||||||
void j_rev_dct (DCTELEM *data);
|
void j_rev_dct (DCTELEM *data);
|
||||||
|
|
||||||
@ -47,6 +48,7 @@ void ff_fdct_mmx2(DCTELEM *block);
|
|||||||
extern const uint8_t ff_alternate_horizontal_scan[64];
|
extern const uint8_t ff_alternate_horizontal_scan[64];
|
||||||
extern const uint8_t ff_alternate_vertical_scan[64];
|
extern const uint8_t ff_alternate_vertical_scan[64];
|
||||||
extern const uint8_t ff_zigzag_direct[64];
|
extern const uint8_t ff_zigzag_direct[64];
|
||||||
|
extern const uint8_t ff_zigzag248_direct[64];
|
||||||
|
|
||||||
/* pixel operations */
|
/* pixel operations */
|
||||||
#define MAX_NEG_CROP 384
|
#define MAX_NEG_CROP 384
|
||||||
@ -244,6 +246,7 @@ typedef struct DSPContext {
|
|||||||
|
|
||||||
/* (I)DCT */
|
/* (I)DCT */
|
||||||
void (*fdct)(DCTELEM *block/* align 16*/);
|
void (*fdct)(DCTELEM *block/* align 16*/);
|
||||||
|
void (*fdct248)(DCTELEM *block/* align 16*/);
|
||||||
|
|
||||||
/* IDCT really*/
|
/* IDCT really*/
|
||||||
void (*idct)(DCTELEM *block/* align 16*/);
|
void (*idct)(DCTELEM *block/* align 16*/);
|
||||||
|
135
libavcodec/dv.c
135
libavcodec/dv.c
@ -35,19 +35,18 @@
|
|||||||
|
|
||||||
typedef struct DVVideoDecodeContext {
|
typedef struct DVVideoDecodeContext {
|
||||||
const DVprofile* sys;
|
const DVprofile* sys;
|
||||||
GetBitContext gb;
|
|
||||||
AVFrame picture;
|
AVFrame picture;
|
||||||
DCTELEM block[5*6][64] __align8;
|
|
||||||
|
|
||||||
/* FIXME: the following is extracted from DSP */
|
|
||||||
uint8_t dv_zigzag[2][64];
|
uint8_t dv_zigzag[2][64];
|
||||||
uint8_t idct_permutation[64];
|
uint8_t dv_idct_shift[2][22][64];
|
||||||
|
uint8_t dv_dct_shift[2][22][64];
|
||||||
|
|
||||||
void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
|
void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
|
||||||
void (*fdct)(DCTELEM *block);
|
void (*fdct[2])(DCTELEM *block);
|
||||||
|
|
||||||
/* XXX: move it to static storage ? */
|
|
||||||
uint8_t dv_shift[2][22][64];
|
|
||||||
void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block);
|
void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block);
|
||||||
|
|
||||||
|
GetBitContext gb;
|
||||||
|
DCTELEM block[5*6][64] __align8;
|
||||||
} DVVideoDecodeContext;
|
} DVVideoDecodeContext;
|
||||||
|
|
||||||
#define TEX_VLC_BITS 9
|
#define TEX_VLC_BITS 9
|
||||||
@ -55,25 +54,29 @@ typedef struct DVVideoDecodeContext {
|
|||||||
static RL_VLC_ELEM *dv_rl_vlc[1];
|
static RL_VLC_ELEM *dv_rl_vlc[1];
|
||||||
static VLC_TYPE dv_vlc_codes[15][23];
|
static VLC_TYPE dv_vlc_codes[15][23];
|
||||||
|
|
||||||
static void dv_build_unquantize_tables(DVVideoDecodeContext *s)
|
static void dv_build_unquantize_tables(DVVideoDecodeContext *s, uint8_t* perm)
|
||||||
{
|
{
|
||||||
int i, q, j;
|
int i, q, j;
|
||||||
|
|
||||||
/* NOTE: max left shift is 6 */
|
/* NOTE: max left shift is 6 */
|
||||||
for(q = 0; q < 22; q++) {
|
for(q = 0; q < 22; q++) {
|
||||||
/* 88 unquant */
|
/* 88DCT */
|
||||||
for(i = 1; i < 64; i++) {
|
for(i = 1; i < 64; i++) {
|
||||||
/* 88 table */
|
/* 88 table */
|
||||||
j = s->idct_permutation[i];
|
j = perm[i];
|
||||||
s->dv_shift[0][q][j] =
|
s->dv_idct_shift[0][q][j] =
|
||||||
dv_quant_shifts[q][dv_88_areas[i]] + 1;
|
dv_quant_shifts[q][dv_88_areas[i]] + 1;
|
||||||
|
s->dv_dct_shift[0][q][i] =
|
||||||
|
dv_quant_shifts[q][dv_88_areas[ff_zigzag_direct[i]]] + 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 248 unquant */
|
/* 248DCT */
|
||||||
for(i = 1; i < 64; i++) {
|
for(i = 1; i < 64; i++) {
|
||||||
/* 248 table */
|
/* 248 table */
|
||||||
s->dv_shift[1][q][i] =
|
s->dv_idct_shift[1][q][i] =
|
||||||
dv_quant_shifts[q][dv_248_areas[i]] + 1;
|
dv_quant_shifts[q][dv_248_areas[i]] + 1;
|
||||||
|
s->dv_dct_shift[1][q][i] =
|
||||||
|
dv_quant_shifts[q][dv_248_areas[ff_zigzag248_direct[i]]] + 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -81,8 +84,9 @@ static void dv_build_unquantize_tables(DVVideoDecodeContext *s)
|
|||||||
static int dvvideo_init(AVCodecContext *avctx)
|
static int dvvideo_init(AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
DVVideoDecodeContext *s = avctx->priv_data;
|
DVVideoDecodeContext *s = avctx->priv_data;
|
||||||
MpegEncContext s2;
|
DSPContext dsp;
|
||||||
static int done=0;
|
static int done=0;
|
||||||
|
int i;
|
||||||
|
|
||||||
if (!done) {
|
if (!done) {
|
||||||
int i;
|
int i;
|
||||||
@ -124,27 +128,23 @@ static int dvvideo_init(AVCodecContext *avctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ugly way to get the idct & scantable */
|
/* Generic DSP setup */
|
||||||
/* XXX: fix it */
|
dsputil_init(&dsp, avctx);
|
||||||
memset(&s2, 0, sizeof(MpegEncContext));
|
s->get_pixels = dsp.get_pixels;
|
||||||
s2.avctx = avctx;
|
|
||||||
dsputil_init(&s2.dsp, avctx);
|
|
||||||
if (DCT_common_init(&s2) < 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
s->get_pixels = s2.dsp.get_pixels;
|
/* 88DCT setup */
|
||||||
s->fdct = s2.dsp.fdct;
|
s->fdct[0] = dsp.fdct;
|
||||||
|
s->idct_put[0] = dsp.idct_put;
|
||||||
s->idct_put[0] = s2.dsp.idct_put;
|
for (i=0; i<64; i++)
|
||||||
memcpy(s->idct_permutation, s2.dsp.idct_permutation, 64);
|
s->dv_zigzag[0][i] = dsp.idct_permutation[ff_zigzag_direct[i]];
|
||||||
memcpy(s->dv_zigzag[0], s2.intra_scantable.permutated, 64);
|
|
||||||
|
|
||||||
/* XXX: use MMX also for idct248 */
|
/* 248DCT setup */
|
||||||
s->idct_put[1] = simple_idct248_put;
|
s->fdct[1] = dsp.fdct248;
|
||||||
memcpy(s->dv_zigzag[1], dv_248_zigzag, 64);
|
s->idct_put[1] = simple_idct248_put; // FIXME: need to add it to DSP
|
||||||
|
memcpy(s->dv_zigzag[1], ff_zigzag248_direct, 64);
|
||||||
|
|
||||||
/* XXX: do it only for constant case */
|
/* XXX: do it only for constant case */
|
||||||
dv_build_unquantize_tables(s);
|
dv_build_unquantize_tables(s, dsp.idct_permutation);
|
||||||
|
|
||||||
/* FIXME: I really don't think this should be here */
|
/* FIXME: I really don't think this should be here */
|
||||||
if (dv_codec_profile(avctx))
|
if (dv_codec_profile(avctx))
|
||||||
@ -367,7 +367,7 @@ static inline void dv_decode_video_segment(DVVideoDecodeContext *s,
|
|||||||
mb->scan_table = s->dv_zigzag[dct_mode];
|
mb->scan_table = s->dv_zigzag[dct_mode];
|
||||||
class1 = get_bits(&s->gb, 2);
|
class1 = get_bits(&s->gb, 2);
|
||||||
mb->shift_offset = (class1 == 3);
|
mb->shift_offset = (class1 == 3);
|
||||||
mb->shift_table = s->dv_shift[dct_mode]
|
mb->shift_table = s->dv_idct_shift[dct_mode]
|
||||||
[quant + dv_quant_offset[class1]];
|
[quant + dv_quant_offset[class1]];
|
||||||
dc = dc << 2;
|
dc = dc << 2;
|
||||||
/* convert to unsigned because 128 is not added in the
|
/* convert to unsigned because 128 is not added in the
|
||||||
@ -571,6 +571,8 @@ typedef struct EncBlockInfo {
|
|||||||
int block_size;
|
int block_size;
|
||||||
DCTELEM *mb;
|
DCTELEM *mb;
|
||||||
PutBitContext pb;
|
PutBitContext pb;
|
||||||
|
const uint8_t* zigzag_scan;
|
||||||
|
uint8_t *dv_shift;
|
||||||
} EncBlockInfo;
|
} EncBlockInfo;
|
||||||
|
|
||||||
static inline int dv_bits_left(EncBlockInfo* bi)
|
static inline int dv_bits_left(EncBlockInfo* bi)
|
||||||
@ -583,11 +585,10 @@ static inline void dv_encode_ac(EncBlockInfo* bi, PutBitContext* heap)
|
|||||||
int i, level, size, run = 0;
|
int i, level, size, run = 0;
|
||||||
uint32_t vlc;
|
uint32_t vlc;
|
||||||
PutBitContext* cpb = &bi->pb;
|
PutBitContext* cpb = &bi->pb;
|
||||||
|
int bias = (bi->cno == 3);
|
||||||
|
|
||||||
for (i=1; i<64; i++) {
|
for (i=1; i<64; i++) {
|
||||||
level = bi->mb[ff_zigzag_direct[i]] /
|
level = bi->mb[bi->zigzag_scan[i]] / (1<<(bi->dv_shift[i] + bias));
|
||||||
(1<<(dv_quant_shifts[bi->qno + dv_quant_offset[bi->cno]]
|
|
||||||
[dv_88_areas[ff_zigzag_direct[i]]] + 4 + (bi->cno == 3)));
|
|
||||||
if (level != 0) {
|
if (level != 0) {
|
||||||
size = dv_rl2vlc(run, level, &vlc);
|
size = dv_rl2vlc(run, level, &vlc);
|
||||||
put_vlc:
|
put_vlc:
|
||||||
@ -663,11 +664,26 @@ static inline void dv_set_class_number(EncBlockInfo* bi, int j)
|
|||||||
bi->cno = 3;
|
bi->cno = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define SQ(a) ((a)*(a))
|
||||||
|
static int dv_score_lines(DCTELEM *s, int stride) {
|
||||||
|
int score=0;
|
||||||
|
int x, y;
|
||||||
|
|
||||||
|
for(y=0; y<4; y++) {
|
||||||
|
for(x=0; x<8; x+=4){
|
||||||
|
score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride])
|
||||||
|
+SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
|
||||||
|
}
|
||||||
|
s+= stride;
|
||||||
|
}
|
||||||
|
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is a very rough initial implementaion. The performance is
|
* This is a very rough initial implementaion. The performance is
|
||||||
* horrible and some features are missing, mainly 2-4-8 DCT encoding.
|
* horrible and the weighting is missing. But it's missing from the
|
||||||
* The weighting is missing as well, but it's missing from the decoding
|
* decoding step also -- so at least we're on the same page with decoder ;-)
|
||||||
* step also -- so at least we're on the same page with decoder ;-)
|
|
||||||
*/
|
*/
|
||||||
static inline void dv_encode_video_segment(DVVideoDecodeContext *s,
|
static inline void dv_encode_video_segment(DVVideoDecodeContext *s,
|
||||||
uint8_t *dif,
|
uint8_t *dif,
|
||||||
@ -691,6 +707,7 @@ static inline void dv_encode_video_segment(DVVideoDecodeContext *s,
|
|||||||
|
|
||||||
/* Stage 1 -- doing DCT on 5 MBs */
|
/* Stage 1 -- doing DCT on 5 MBs */
|
||||||
block = &s->block[0][0];
|
block = &s->block[0][0];
|
||||||
|
enc_blk = &enc_blks[0];
|
||||||
for(mb_index = 0; mb_index < 5; mb_index++) {
|
for(mb_index = 0; mb_index < 5; mb_index++) {
|
||||||
v = *mb_pos_ptr++;
|
v = *mb_pos_ptr++;
|
||||||
mb_x = v & 0xff;
|
mb_x = v & 0xff;
|
||||||
@ -731,36 +748,36 @@ static inline void dv_encode_video_segment(DVVideoDecodeContext *s,
|
|||||||
} else { /* Simple copy: 8x8 -> 8x8 */
|
} else { /* Simple copy: 8x8 -> 8x8 */
|
||||||
s->get_pixels(block, data, linesize);
|
s->get_pixels(block, data, linesize);
|
||||||
}
|
}
|
||||||
|
|
||||||
s->fdct(block);
|
if (dv_score_lines(block, 8) + dv_score_lines(block+8*4, 8) - 100 >
|
||||||
|
dv_score_lines(block, 16) + dv_score_lines(block+8, 16)) {
|
||||||
|
enc_blk->dct_mode = 1;
|
||||||
|
enc_blk->zigzag_scan = ff_zigzag248_direct;
|
||||||
|
} else {
|
||||||
|
enc_blk->dct_mode = 0;
|
||||||
|
enc_blk->zigzag_scan = ff_zigzag_direct;
|
||||||
|
}
|
||||||
|
enc_blk->mb = block;
|
||||||
|
enc_blk->block_size = block_sizes[j];
|
||||||
|
|
||||||
|
s->fdct[enc_blk->dct_mode](block);
|
||||||
|
|
||||||
|
dv_set_class_number(enc_blk, j/4*(j%2));
|
||||||
|
|
||||||
block += 64;
|
block += 64;
|
||||||
|
enc_blk++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Stage 2 -- setup for encoding phase */
|
/* Stage 2 -- encoding by trial-and-error */
|
||||||
enc_blk = &enc_blks[0];
|
|
||||||
block = &s->block[0][0];
|
|
||||||
for (i=0; i<5; i++) {
|
|
||||||
for (j=0; j<6; j++) {
|
|
||||||
enc_blk->mb = block;
|
|
||||||
enc_blk->dct_mode = 0;
|
|
||||||
enc_blk->block_size = block_sizes[j];
|
|
||||||
|
|
||||||
dv_set_class_number(enc_blk, j/4*(j%2));
|
|
||||||
|
|
||||||
block += 64;
|
|
||||||
enc_blk++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Stage 3 -- encoding by trial-and-error */
|
|
||||||
encode_vs:
|
encode_vs:
|
||||||
enc_blk = &enc_blks[0];
|
enc_blk = &enc_blks[0];
|
||||||
for (i=0; i<5; i++) {
|
for (i=0; i<5; i++) {
|
||||||
uint8_t* p = dif + i*80 + 4;
|
uint8_t* p = dif + i*80 + 4;
|
||||||
for (j=0; j<6; j++) {
|
for (j=0; j<6; j++) {
|
||||||
enc_blk->qno = QNO;
|
enc_blk->qno = QNO;
|
||||||
|
enc_blk->dv_shift = &(s->dv_dct_shift[0]
|
||||||
|
[QNO + dv_quant_offset[enc_blk->cno]][0]);
|
||||||
init_put_bits(&enc_blk->pb, p, block_sizes[j]/8);
|
init_put_bits(&enc_blk->pb, p, block_sizes[j]/8);
|
||||||
enc_blk++;
|
enc_blk++;
|
||||||
p += block_sizes[j]/8;
|
p += block_sizes[j]/8;
|
||||||
|
@ -270,19 +270,6 @@ static const uint8_t dv_vlc_level[409] = {
|
|||||||
0,
|
0,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Specific zigzag scan for 248 idct. NOTE that unlike the
|
|
||||||
specification, we interleave the fields */
|
|
||||||
static const uint8_t dv_248_zigzag[64] = {
|
|
||||||
0, 8, 1, 9, 16, 24, 2, 10,
|
|
||||||
17, 25, 32, 40, 48, 56, 33, 41,
|
|
||||||
18, 26, 3, 11, 4, 12, 19, 27,
|
|
||||||
34, 42, 49, 57, 50, 58, 35, 43,
|
|
||||||
20, 28, 5, 13, 6, 14, 21, 29,
|
|
||||||
36, 44, 51, 59, 52, 60, 37, 45,
|
|
||||||
22, 30, 7, 15, 23, 31, 38, 46,
|
|
||||||
53, 61, 54, 62, 39, 47, 55, 63,
|
|
||||||
};
|
|
||||||
|
|
||||||
/* unquant tables (not used directly) */
|
/* unquant tables (not used directly) */
|
||||||
static const uint8_t dv_88_areas[64] = {
|
static const uint8_t dv_88_areas[64] = {
|
||||||
0,0,0,1,1,1,2,2,
|
0,0,0,1,1,1,2,2,
|
||||||
|
@ -295,3 +295,130 @@ ff_jpeg_fdct_islow (DCTELEM * data)
|
|||||||
dataptr++; /* advance pointer to next column */
|
dataptr++; /* advance pointer to next column */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT
|
||||||
|
* on the rows and then, instead of doing even and odd, part on the colums
|
||||||
|
* you do even part two times.
|
||||||
|
*/
|
||||||
|
GLOBAL(void)
|
||||||
|
ff_fdct248_islow (DCTELEM * data)
|
||||||
|
{
|
||||||
|
int32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||||
|
int32_t tmp10, tmp11, tmp12, tmp13;
|
||||||
|
int32_t z1, z2, z3, z4, z5;
|
||||||
|
DCTELEM *dataptr;
|
||||||
|
int ctr;
|
||||||
|
SHIFT_TEMPS
|
||||||
|
|
||||||
|
/* Pass 1: process rows. */
|
||||||
|
/* Note results are scaled up by sqrt(8) compared to a true DCT; */
|
||||||
|
/* furthermore, we scale the results by 2**PASS1_BITS. */
|
||||||
|
|
||||||
|
dataptr = data;
|
||||||
|
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
|
||||||
|
tmp0 = dataptr[0] + dataptr[7];
|
||||||
|
tmp7 = dataptr[0] - dataptr[7];
|
||||||
|
tmp1 = dataptr[1] + dataptr[6];
|
||||||
|
tmp6 = dataptr[1] - dataptr[6];
|
||||||
|
tmp2 = dataptr[2] + dataptr[5];
|
||||||
|
tmp5 = dataptr[2] - dataptr[5];
|
||||||
|
tmp3 = dataptr[3] + dataptr[4];
|
||||||
|
tmp4 = dataptr[3] - dataptr[4];
|
||||||
|
|
||||||
|
/* Even part per LL&M figure 1 --- note that published figure is faulty;
|
||||||
|
* rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
|
||||||
|
*/
|
||||||
|
|
||||||
|
tmp10 = tmp0 + tmp3;
|
||||||
|
tmp13 = tmp0 - tmp3;
|
||||||
|
tmp11 = tmp1 + tmp2;
|
||||||
|
tmp12 = tmp1 - tmp2;
|
||||||
|
|
||||||
|
dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
|
||||||
|
dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
|
||||||
|
|
||||||
|
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
||||||
|
dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
||||||
|
CONST_BITS-PASS1_BITS);
|
||||||
|
dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
||||||
|
CONST_BITS-PASS1_BITS);
|
||||||
|
|
||||||
|
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
|
||||||
|
* cK represents cos(K*pi/16).
|
||||||
|
* i0..i3 in the paper are tmp4..tmp7 here.
|
||||||
|
*/
|
||||||
|
|
||||||
|
z1 = tmp4 + tmp7;
|
||||||
|
z2 = tmp5 + tmp6;
|
||||||
|
z3 = tmp4 + tmp6;
|
||||||
|
z4 = tmp5 + tmp7;
|
||||||
|
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
|
||||||
|
|
||||||
|
tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
|
||||||
|
tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
|
||||||
|
tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
|
||||||
|
tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
|
||||||
|
z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
|
||||||
|
z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
|
||||||
|
z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
|
||||||
|
z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
|
||||||
|
|
||||||
|
z3 += z5;
|
||||||
|
z4 += z5;
|
||||||
|
|
||||||
|
dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
|
||||||
|
dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
|
||||||
|
dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
|
||||||
|
dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
|
||||||
|
|
||||||
|
dataptr += DCTSIZE; /* advance pointer to next row */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pass 2: process columns.
|
||||||
|
* We remove the PASS1_BITS scaling, but leave the results scaled up
|
||||||
|
* by an overall factor of 8.
|
||||||
|
*/
|
||||||
|
|
||||||
|
dataptr = data;
|
||||||
|
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
|
||||||
|
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
|
||||||
|
tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
|
||||||
|
tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
|
||||||
|
tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
|
||||||
|
tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
|
||||||
|
tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
|
||||||
|
tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
|
||||||
|
tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
|
||||||
|
|
||||||
|
tmp10 = tmp0 + tmp3;
|
||||||
|
tmp11 = tmp1 + tmp2;
|
||||||
|
tmp12 = tmp1 - tmp2;
|
||||||
|
tmp13 = tmp0 - tmp3;
|
||||||
|
|
||||||
|
dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
|
||||||
|
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
|
||||||
|
|
||||||
|
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
||||||
|
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
||||||
|
CONST_BITS+PASS1_BITS);
|
||||||
|
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
||||||
|
CONST_BITS+PASS1_BITS);
|
||||||
|
|
||||||
|
tmp10 = tmp4 + tmp7;
|
||||||
|
tmp11 = tmp5 + tmp6;
|
||||||
|
tmp12 = tmp5 - tmp6;
|
||||||
|
tmp13 = tmp4 - tmp7;
|
||||||
|
|
||||||
|
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
|
||||||
|
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
|
||||||
|
|
||||||
|
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
||||||
|
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
||||||
|
CONST_BITS+PASS1_BITS);
|
||||||
|
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
||||||
|
CONST_BITS+PASS1_BITS);
|
||||||
|
|
||||||
|
dataptr++; /* advance pointer to next column */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user