Add the notion of pixel size in h264 related functions.
In high bit depth the pixels will not be stored in uint8_t like in the normal case, but in uint16_t. The pixel size is thus 1 in normal bit depth and 2 in high bit depth. Preparatory patch for high bit depth h264 decoding support. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
This commit is contained in:
parent
44ca80df34
commit
6e3ef511d7
4
ffplay.c
4
ffplay.c
@ -1577,6 +1577,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
|
|||||||
int perms = AV_PERM_WRITE;
|
int perms = AV_PERM_WRITE;
|
||||||
int i, w, h, stride[4];
|
int i, w, h, stride[4];
|
||||||
unsigned edge;
|
unsigned edge;
|
||||||
|
int pixel_size;
|
||||||
|
|
||||||
if (codec->codec->capabilities & CODEC_CAP_NEG_LINESIZES)
|
if (codec->codec->capabilities & CODEC_CAP_NEG_LINESIZES)
|
||||||
perms |= AV_PERM_NEG_LINESIZES;
|
perms |= AV_PERM_NEG_LINESIZES;
|
||||||
@ -1598,6 +1599,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
|
|||||||
if(!(ref = avfilter_get_video_buffer(ctx->outputs[0], perms, w, h)))
|
if(!(ref = avfilter_get_video_buffer(ctx->outputs[0], perms, w, h)))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
pixel_size = av_pix_fmt_descriptors[ref->format].comp[0].step_minus1+1;
|
||||||
ref->video->w = codec->width;
|
ref->video->w = codec->width;
|
||||||
ref->video->h = codec->height;
|
ref->video->h = codec->height;
|
||||||
for(i = 0; i < 4; i ++) {
|
for(i = 0; i < 4; i ++) {
|
||||||
@ -1605,7 +1607,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
|
|||||||
unsigned vshift = (i == 1 || i == 2) ? av_pix_fmt_descriptors[ref->format].log2_chroma_h : 0;
|
unsigned vshift = (i == 1 || i == 2) ? av_pix_fmt_descriptors[ref->format].log2_chroma_h : 0;
|
||||||
|
|
||||||
if (ref->data[i]) {
|
if (ref->data[i]) {
|
||||||
ref->data[i] += (edge >> hshift) + ((edge * ref->linesize[i]) >> vshift);
|
ref->data[i] += ((edge * pixel_size) >> hshift) + ((edge * ref->linesize[i]) >> vshift);
|
||||||
}
|
}
|
||||||
pic->data[i] = ref->data[i];
|
pic->data[i] = ref->data[i];
|
||||||
pic->linesize[i] = ref->linesize[i];
|
pic->linesize[i] = ref->linesize[i];
|
||||||
|
@ -314,12 +314,13 @@ static void chroma_dc_dct_c(DCTELEM *block){
|
|||||||
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
|
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
|
||||||
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
|
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
|
||||||
int src_x_offset, int src_y_offset,
|
int src_x_offset, int src_y_offset,
|
||||||
qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
|
qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
|
||||||
|
int pixel_shift){
|
||||||
MpegEncContext * const s = &h->s;
|
MpegEncContext * const s = &h->s;
|
||||||
const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
|
const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
|
||||||
int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
|
int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
|
||||||
const int luma_xy= (mx&3) + ((my&3)<<2);
|
const int luma_xy= (mx&3) + ((my&3)<<2);
|
||||||
uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
|
uint8_t * src_y = pic->data[0] + ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
|
||||||
uint8_t * src_cb, * src_cr;
|
uint8_t * src_cb, * src_cr;
|
||||||
int extra_width= h->emu_edge_width;
|
int extra_width= h->emu_edge_width;
|
||||||
int extra_height= h->emu_edge_height;
|
int extra_height= h->emu_edge_height;
|
||||||
@ -336,8 +337,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
|
|||||||
|| full_my < 0-extra_height
|
|| full_my < 0-extra_height
|
||||||
|| full_mx + 16/*FIXME*/ > pic_width + extra_width
|
|| full_mx + 16/*FIXME*/ > pic_width + extra_width
|
||||||
|| full_my + 16/*FIXME*/ > pic_height + extra_height){
|
|| full_my + 16/*FIXME*/ > pic_height + extra_height){
|
||||||
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
|
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
|
||||||
src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
|
src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
|
||||||
emu=1;
|
emu=1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -353,8 +354,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
|
|||||||
my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
|
my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
|
||||||
emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
|
emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
|
||||||
}
|
}
|
||||||
src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
|
src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
|
||||||
src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
|
src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
|
||||||
|
|
||||||
if(emu){
|
if(emu){
|
||||||
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
|
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
|
||||||
@ -374,14 +375,14 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
|
|||||||
int x_offset, int y_offset,
|
int x_offset, int y_offset,
|
||||||
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
|
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
|
||||||
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
|
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
|
||||||
int list0, int list1){
|
int list0, int list1, int pixel_shift){
|
||||||
MpegEncContext * const s = &h->s;
|
MpegEncContext * const s = &h->s;
|
||||||
qpel_mc_func *qpix_op= qpix_put;
|
qpel_mc_func *qpix_op= qpix_put;
|
||||||
h264_chroma_mc_func chroma_op= chroma_put;
|
h264_chroma_mc_func chroma_op= chroma_put;
|
||||||
|
|
||||||
dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
|
dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h-> mb_linesize;
|
||||||
dest_cb += x_offset + y_offset*h->mb_uvlinesize;
|
dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
|
||||||
dest_cr += x_offset + y_offset*h->mb_uvlinesize;
|
dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
|
||||||
x_offset += 8*s->mb_x;
|
x_offset += 8*s->mb_x;
|
||||||
y_offset += 8*(s->mb_y >> MB_FIELD);
|
y_offset += 8*(s->mb_y >> MB_FIELD);
|
||||||
|
|
||||||
@ -389,7 +390,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
|
|||||||
Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
|
Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
|
||||||
mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
|
mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
|
||||||
dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
||||||
qpix_op, chroma_op);
|
qpix_op, chroma_op, pixel_shift);
|
||||||
|
|
||||||
qpix_op= qpix_avg;
|
qpix_op= qpix_avg;
|
||||||
chroma_op= chroma_avg;
|
chroma_op= chroma_avg;
|
||||||
@ -399,7 +400,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
|
|||||||
Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
|
Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
|
||||||
mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
|
mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
|
||||||
dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
||||||
qpix_op, chroma_op);
|
qpix_op, chroma_op, pixel_shift);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -409,12 +410,12 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
|
|||||||
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
|
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
|
||||||
h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
|
h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
|
||||||
h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
|
h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
|
||||||
int list0, int list1){
|
int list0, int list1, int pixel_shift){
|
||||||
MpegEncContext * const s = &h->s;
|
MpegEncContext * const s = &h->s;
|
||||||
|
|
||||||
dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
|
dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h-> mb_linesize;
|
||||||
dest_cb += x_offset + y_offset*h->mb_uvlinesize;
|
dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
|
||||||
dest_cr += x_offset + y_offset*h->mb_uvlinesize;
|
dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
|
||||||
x_offset += 8*s->mb_x;
|
x_offset += 8*s->mb_x;
|
||||||
y_offset += 8*(s->mb_y >> MB_FIELD);
|
y_offset += 8*(s->mb_y >> MB_FIELD);
|
||||||
|
|
||||||
@ -422,17 +423,17 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
|
|||||||
/* don't optimize for luma-only case, since B-frames usually
|
/* don't optimize for luma-only case, since B-frames usually
|
||||||
* use implicit weights => chroma too. */
|
* use implicit weights => chroma too. */
|
||||||
uint8_t *tmp_cb = s->obmc_scratchpad;
|
uint8_t *tmp_cb = s->obmc_scratchpad;
|
||||||
uint8_t *tmp_cr = s->obmc_scratchpad + 8;
|
uint8_t *tmp_cr = s->obmc_scratchpad + (8 << pixel_shift);
|
||||||
uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
|
uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
|
||||||
int refn0 = h->ref_cache[0][ scan8[n] ];
|
int refn0 = h->ref_cache[0][ scan8[n] ];
|
||||||
int refn1 = h->ref_cache[1][ scan8[n] ];
|
int refn1 = h->ref_cache[1][ scan8[n] ];
|
||||||
|
|
||||||
mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
|
mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
|
||||||
dest_y, dest_cb, dest_cr,
|
dest_y, dest_cb, dest_cr,
|
||||||
x_offset, y_offset, qpix_put, chroma_put);
|
x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
|
||||||
mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
|
mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
|
||||||
tmp_y, tmp_cb, tmp_cr,
|
tmp_y, tmp_cb, tmp_cr,
|
||||||
x_offset, y_offset, qpix_put, chroma_put);
|
x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
|
||||||
|
|
||||||
if(h->use_weight == 2){
|
if(h->use_weight == 2){
|
||||||
int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
|
int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
|
||||||
@ -457,7 +458,7 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
|
|||||||
Picture *ref= &h->ref_list[list][refn];
|
Picture *ref= &h->ref_list[list][refn];
|
||||||
mc_dir_part(h, ref, n, square, chroma_height, delta, list,
|
mc_dir_part(h, ref, n, square, chroma_height, delta, list,
|
||||||
dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
||||||
qpix_put, chroma_put);
|
qpix_put, chroma_put, pixel_shift);
|
||||||
|
|
||||||
luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
|
luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
|
||||||
h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
|
h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
|
||||||
@ -476,19 +477,21 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
|
|||||||
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
|
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
|
||||||
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
|
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
|
||||||
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
|
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
|
||||||
int list0, int list1){
|
int list0, int list1, int pixel_shift){
|
||||||
if((h->use_weight==2 && list0 && list1
|
if((h->use_weight==2 && list0 && list1
|
||||||
&& (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
|
&& (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
|
||||||
|| h->use_weight==1)
|
|| h->use_weight==1)
|
||||||
mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
|
mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
|
||||||
x_offset, y_offset, qpix_put, chroma_put,
|
x_offset, y_offset, qpix_put, chroma_put,
|
||||||
weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
|
weight_op[0], weight_op[3], weight_avg[0],
|
||||||
|
weight_avg[3], list0, list1, pixel_shift);
|
||||||
else
|
else
|
||||||
mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
|
mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
|
||||||
x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
|
x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
|
||||||
|
chroma_avg, list0, list1, pixel_shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void prefetch_motion(H264Context *h, int list){
|
static inline void prefetch_motion(H264Context *h, int list, int pixel_shift){
|
||||||
/* fetch pixels for estimated mv 4 macroblocks ahead
|
/* fetch pixels for estimated mv 4 macroblocks ahead
|
||||||
* optimized for 64byte cache lines */
|
* optimized for 64byte cache lines */
|
||||||
MpegEncContext * const s = &h->s;
|
MpegEncContext * const s = &h->s;
|
||||||
@ -497,48 +500,54 @@ static inline void prefetch_motion(H264Context *h, int list){
|
|||||||
const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
|
const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
|
||||||
const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
|
const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
|
||||||
uint8_t **src= h->ref_list[list][refn].data;
|
uint8_t **src= h->ref_list[list][refn].data;
|
||||||
int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
|
int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
|
||||||
s->dsp.prefetch(src[0]+off, s->linesize, 4);
|
s->dsp.prefetch(src[0]+off, s->linesize, 4);
|
||||||
off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
|
off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
|
||||||
s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
|
s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
|
static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
|
||||||
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
|
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
|
||||||
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
|
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
|
||||||
h264_weight_func *weight_op, h264_biweight_func *weight_avg){
|
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
|
||||||
|
int pixel_shift){
|
||||||
MpegEncContext * const s = &h->s;
|
MpegEncContext * const s = &h->s;
|
||||||
const int mb_xy= h->mb_xy;
|
const int mb_xy= h->mb_xy;
|
||||||
const int mb_type= s->current_picture.mb_type[mb_xy];
|
const int mb_type= s->current_picture.mb_type[mb_xy];
|
||||||
|
|
||||||
assert(IS_INTER(mb_type));
|
assert(IS_INTER(mb_type));
|
||||||
|
|
||||||
prefetch_motion(h, 0);
|
prefetch_motion(h, 0, pixel_shift);
|
||||||
|
|
||||||
if(IS_16X16(mb_type)){
|
if(IS_16X16(mb_type)){
|
||||||
mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
|
mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
|
||||||
qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
|
qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
|
||||||
weight_op, weight_avg,
|
weight_op, weight_avg,
|
||||||
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
|
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
|
||||||
|
pixel_shift);
|
||||||
}else if(IS_16X8(mb_type)){
|
}else if(IS_16X8(mb_type)){
|
||||||
mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
|
mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
|
||||||
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
|
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
|
||||||
&weight_op[1], &weight_avg[1],
|
&weight_op[1], &weight_avg[1],
|
||||||
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
|
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
|
||||||
mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
|
pixel_shift);
|
||||||
|
mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
|
||||||
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
|
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
|
||||||
&weight_op[1], &weight_avg[1],
|
&weight_op[1], &weight_avg[1],
|
||||||
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
|
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
|
||||||
|
pixel_shift);
|
||||||
}else if(IS_8X16(mb_type)){
|
}else if(IS_8X16(mb_type)){
|
||||||
mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
|
mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
|
||||||
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
|
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
|
||||||
&weight_op[2], &weight_avg[2],
|
&weight_op[2], &weight_avg[2],
|
||||||
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
|
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
|
||||||
|
pixel_shift);
|
||||||
mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
|
mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
|
||||||
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
|
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
|
||||||
&weight_op[2], &weight_avg[2],
|
&weight_op[2], &weight_avg[2],
|
||||||
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
|
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
|
||||||
|
pixel_shift);
|
||||||
}else{
|
}else{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -554,25 +563,30 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
|
|||||||
mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
||||||
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
|
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
|
||||||
&weight_op[3], &weight_avg[3],
|
&weight_op[3], &weight_avg[3],
|
||||||
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
|
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
|
||||||
|
pixel_shift);
|
||||||
}else if(IS_SUB_8X4(sub_mb_type)){
|
}else if(IS_SUB_8X4(sub_mb_type)){
|
||||||
mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
mc_part(h, n , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
||||||
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
|
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
|
||||||
&weight_op[4], &weight_avg[4],
|
&weight_op[4], &weight_avg[4],
|
||||||
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
|
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
|
||||||
mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
|
pixel_shift);
|
||||||
|
mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
|
||||||
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
|
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
|
||||||
&weight_op[4], &weight_avg[4],
|
&weight_op[4], &weight_avg[4],
|
||||||
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
|
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
|
||||||
|
pixel_shift);
|
||||||
}else if(IS_SUB_4X8(sub_mb_type)){
|
}else if(IS_SUB_4X8(sub_mb_type)){
|
||||||
mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
||||||
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
|
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
|
||||||
&weight_op[5], &weight_avg[5],
|
&weight_op[5], &weight_avg[5],
|
||||||
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
|
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
|
||||||
|
pixel_shift);
|
||||||
mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
|
mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
|
||||||
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
|
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
|
||||||
&weight_op[5], &weight_avg[5],
|
&weight_op[5], &weight_avg[5],
|
||||||
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
|
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
|
||||||
|
pixel_shift);
|
||||||
}else{
|
}else{
|
||||||
int j;
|
int j;
|
||||||
assert(IS_SUB_4X4(sub_mb_type));
|
assert(IS_SUB_4X4(sub_mb_type));
|
||||||
@ -582,15 +596,32 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
|
|||||||
mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
|
mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
|
||||||
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
|
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
|
||||||
&weight_op[6], &weight_avg[6],
|
&weight_op[6], &weight_avg[6],
|
||||||
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
|
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
|
||||||
|
pixel_shift);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
prefetch_motion(h, 1);
|
prefetch_motion(h, 1, pixel_shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define hl_motion_fn(sh, bits) \
|
||||||
|
static av_always_inline void hl_motion_ ## bits(H264Context *h, \
|
||||||
|
uint8_t *dest_y, \
|
||||||
|
uint8_t *dest_cb, uint8_t *dest_cr, \
|
||||||
|
qpel_mc_func (*qpix_put)[16], \
|
||||||
|
h264_chroma_mc_func (*chroma_put), \
|
||||||
|
qpel_mc_func (*qpix_avg)[16], \
|
||||||
|
h264_chroma_mc_func (*chroma_avg), \
|
||||||
|
h264_weight_func *weight_op, \
|
||||||
|
h264_biweight_func *weight_avg) \
|
||||||
|
{ \
|
||||||
|
hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
|
||||||
|
qpix_avg, chroma_avg, weight_op, weight_avg, sh); \
|
||||||
|
}
|
||||||
|
hl_motion_fn(0, 8);
|
||||||
|
hl_motion_fn(1, 16);
|
||||||
|
|
||||||
static void free_tables(H264Context *h, int free_rbsp){
|
static void free_tables(H264Context *h, int free_rbsp){
|
||||||
int i;
|
int i;
|
||||||
@ -758,8 +789,8 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
|
|||||||
* Allocate buffers which are not shared amongst multiple threads.
|
* Allocate buffers which are not shared amongst multiple threads.
|
||||||
*/
|
*/
|
||||||
static int context_init(H264Context *h){
|
static int context_init(H264Context *h){
|
||||||
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
|
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
|
||||||
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
|
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
|
||||||
|
|
||||||
h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
|
h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
|
||||||
h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
|
h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
|
||||||
@ -861,6 +892,8 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
|
|||||||
|
|
||||||
ff_h264_decode_init_vlc();
|
ff_h264_decode_init_vlc();
|
||||||
|
|
||||||
|
h->pixel_shift = 0;
|
||||||
|
|
||||||
h->thread_context[0] = h;
|
h->thread_context[0] = h;
|
||||||
h->outputed_poc = INT_MIN;
|
h->outputed_poc = INT_MIN;
|
||||||
h->prev_poc_msb= 1<<16;
|
h->prev_poc_msb= 1<<16;
|
||||||
@ -888,6 +921,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
|
|||||||
int ff_h264_frame_start(H264Context *h){
|
int ff_h264_frame_start(H264Context *h){
|
||||||
MpegEncContext * const s = &h->s;
|
MpegEncContext * const s = &h->s;
|
||||||
int i;
|
int i;
|
||||||
|
const int pixel_shift = h->pixel_shift;
|
||||||
|
|
||||||
if(MPV_frame_start(s, s->avctx) < 0)
|
if(MPV_frame_start(s, s->avctx) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
@ -904,14 +938,14 @@ int ff_h264_frame_start(H264Context *h){
|
|||||||
assert(s->linesize && s->uvlinesize);
|
assert(s->linesize && s->uvlinesize);
|
||||||
|
|
||||||
for(i=0; i<16; i++){
|
for(i=0; i<16; i++){
|
||||||
h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
|
h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
|
||||||
h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
|
h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
|
||||||
}
|
}
|
||||||
for(i=0; i<4; i++){
|
for(i=0; i<4; i++){
|
||||||
h->block_offset[16+i]=
|
h->block_offset[16+i]=
|
||||||
h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
|
h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
|
||||||
h->block_offset[24+16+i]=
|
h->block_offset[24+16+i]=
|
||||||
h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
|
h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* can't be in alloc_tables because linesize isn't known there.
|
/* can't be in alloc_tables because linesize isn't known there.
|
||||||
@ -945,6 +979,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
|
|||||||
MpegEncContext * const s = &h->s;
|
MpegEncContext * const s = &h->s;
|
||||||
uint8_t *top_border;
|
uint8_t *top_border;
|
||||||
int top_idx = 1;
|
int top_idx = 1;
|
||||||
|
const int pixel_shift = h->pixel_shift;
|
||||||
|
|
||||||
src_y -= linesize;
|
src_y -= linesize;
|
||||||
src_cb -= uvlinesize;
|
src_cb -= uvlinesize;
|
||||||
@ -955,11 +990,18 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
|
|||||||
if(!MB_MBAFF){
|
if(!MB_MBAFF){
|
||||||
top_border = h->top_borders[0][s->mb_x];
|
top_border = h->top_borders[0][s->mb_x];
|
||||||
AV_COPY128(top_border, src_y + 15*linesize);
|
AV_COPY128(top_border, src_y + 15*linesize);
|
||||||
|
if (pixel_shift)
|
||||||
|
AV_COPY128(top_border+16, src_y+15*linesize+16);
|
||||||
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
|
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
|
||||||
|
if (pixel_shift) {
|
||||||
|
AV_COPY128(top_border+32, src_cb+7*uvlinesize);
|
||||||
|
AV_COPY128(top_border+48, src_cr+7*uvlinesize);
|
||||||
|
} else {
|
||||||
AV_COPY64(top_border+16, src_cb+7*uvlinesize);
|
AV_COPY64(top_border+16, src_cb+7*uvlinesize);
|
||||||
AV_COPY64(top_border+24, src_cr+7*uvlinesize);
|
AV_COPY64(top_border+24, src_cr+7*uvlinesize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}else if(MB_MBAFF){
|
}else if(MB_MBAFF){
|
||||||
top_idx = 0;
|
top_idx = 0;
|
||||||
}else
|
}else
|
||||||
@ -970,14 +1012,24 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
|
|||||||
// There are two lines saved, the line above the the top macroblock of a pair,
|
// There are two lines saved, the line above the the top macroblock of a pair,
|
||||||
// and the line above the bottom macroblock
|
// and the line above the bottom macroblock
|
||||||
AV_COPY128(top_border, src_y + 16*linesize);
|
AV_COPY128(top_border, src_y + 16*linesize);
|
||||||
|
if (pixel_shift)
|
||||||
|
AV_COPY128(top_border+16, src_y+16*linesize+16);
|
||||||
|
|
||||||
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
|
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
|
||||||
|
if (pixel_shift) {
|
||||||
|
AV_COPY128(top_border+32, src_cb+8*uvlinesize);
|
||||||
|
AV_COPY128(top_border+48, src_cr+8*uvlinesize);
|
||||||
|
} else {
|
||||||
AV_COPY64(top_border+16, src_cb+8*uvlinesize);
|
AV_COPY64(top_border+16, src_cb+8*uvlinesize);
|
||||||
AV_COPY64(top_border+24, src_cr+8*uvlinesize);
|
AV_COPY64(top_border+24, src_cr+8*uvlinesize);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
|
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
|
||||||
|
uint8_t *src_cb, uint8_t *src_cr,
|
||||||
|
int linesize, int uvlinesize,
|
||||||
|
int xchg, int simple, int pixel_shift){
|
||||||
MpegEncContext * const s = &h->s;
|
MpegEncContext * const s = &h->s;
|
||||||
int deblock_left;
|
int deblock_left;
|
||||||
int deblock_top;
|
int deblock_top;
|
||||||
@ -1002,41 +1054,62 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_c
|
|||||||
deblock_top = (s->mb_y > !!MB_FIELD);
|
deblock_top = (s->mb_y > !!MB_FIELD);
|
||||||
}
|
}
|
||||||
|
|
||||||
src_y -= linesize + 1;
|
src_y -= linesize + 1 + pixel_shift;
|
||||||
src_cb -= uvlinesize + 1;
|
src_cb -= uvlinesize + 1 + pixel_shift;
|
||||||
src_cr -= uvlinesize + 1;
|
src_cr -= uvlinesize + 1 + pixel_shift;
|
||||||
|
|
||||||
top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
|
top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
|
||||||
top_border = h->top_borders[top_idx][s->mb_x];
|
top_border = h->top_borders[top_idx][s->mb_x];
|
||||||
|
|
||||||
#define XCHG(a,b,xchg)\
|
#define XCHG(a,b,xchg)\
|
||||||
|
if (pixel_shift) {\
|
||||||
|
if (xchg) {\
|
||||||
|
AV_SWAP64(b+0,a+0);\
|
||||||
|
AV_SWAP64(b+8,a+8);\
|
||||||
|
} else {\
|
||||||
|
AV_COPY128(b,a); \
|
||||||
|
}\
|
||||||
|
} else \
|
||||||
if (xchg) AV_SWAP64(b,a);\
|
if (xchg) AV_SWAP64(b,a);\
|
||||||
else AV_COPY64(b,a);
|
else AV_COPY64(b,a);
|
||||||
|
|
||||||
if(deblock_top){
|
if(deblock_top){
|
||||||
if(deblock_left){
|
if(deblock_left){
|
||||||
XCHG(top_border_m1+8, src_y -7, 1);
|
XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
|
||||||
}
|
}
|
||||||
XCHG(top_border+0, src_y +1, xchg);
|
XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
|
||||||
XCHG(top_border+8, src_y +9, 1);
|
XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
|
||||||
if(s->mb_x+1 < s->mb_width){
|
if(s->mb_x+1 < s->mb_width){
|
||||||
XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1);
|
XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
|
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
|
||||||
if(deblock_top){
|
if(deblock_top){
|
||||||
if(deblock_left){
|
if(deblock_left){
|
||||||
XCHG(top_border_m1+16, src_cb -7, 1);
|
XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
|
||||||
XCHG(top_border_m1+24, src_cr -7, 1);
|
XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
|
||||||
}
|
}
|
||||||
XCHG(top_border+16, src_cb+1, 1);
|
XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
|
||||||
XCHG(top_border+24, src_cr+1, 1);
|
XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
|
||||||
|
if (high_bit_depth) {
|
||||||
|
return AV_RN32A(((int32_t*)mb) + index);
|
||||||
|
} else
|
||||||
|
return AV_RN16A(mb + index);
|
||||||
|
}
|
||||||
|
|
||||||
|
static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
|
||||||
|
if (high_bit_depth) {
|
||||||
|
AV_WN32A(((int32_t*)mb) + index, value);
|
||||||
|
} else
|
||||||
|
AV_WN16A(mb + index, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
|
||||||
MpegEncContext * const s = &h->s;
|
MpegEncContext * const s = &h->s;
|
||||||
const int mb_x= s->mb_x;
|
const int mb_x= s->mb_x;
|
||||||
const int mb_y= s->mb_y;
|
const int mb_y= s->mb_y;
|
||||||
@ -1052,12 +1125,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
|||||||
void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
|
void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
|
||||||
void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
|
void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
|
||||||
|
|
||||||
dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
|
dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;
|
||||||
dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
|
dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
|
||||||
dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
|
dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
|
||||||
|
|
||||||
s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
|
s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
|
||||||
s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
|
s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
|
||||||
|
|
||||||
h->list_counts[mb_xy]= h->list_count;
|
h->list_counts[mb_xy]= h->list_count;
|
||||||
|
|
||||||
@ -1094,6 +1167,28 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!simple && IS_INTRA_PCM(mb_type)) {
|
if (!simple && IS_INTRA_PCM(mb_type)) {
|
||||||
|
if (pixel_shift) {
|
||||||
|
const int bit_depth = h->sps.bit_depth_luma;
|
||||||
|
int j;
|
||||||
|
GetBitContext gb;
|
||||||
|
init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
|
||||||
|
|
||||||
|
for (i = 0; i < 16; i++) {
|
||||||
|
uint16_t *tmp_y = (uint16_t*)(dest_y + i*linesize);
|
||||||
|
for (j = 0; j < 16; j++)
|
||||||
|
tmp_y[j] = get_bits(&gb, bit_depth);
|
||||||
|
}
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
|
||||||
|
for (j = 0; j < 8; j++)
|
||||||
|
tmp_cb[j] = get_bits(&gb, bit_depth);
|
||||||
|
}
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
|
||||||
|
for (j = 0; j < 8; j++)
|
||||||
|
tmp_cr[j] = get_bits(&gb, bit_depth);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
for (i=0; i<16; i++) {
|
for (i=0; i<16; i++) {
|
||||||
memcpy(dest_y + i* linesize, h->mb + i*8, 16);
|
memcpy(dest_y + i* linesize, h->mb + i*8, 16);
|
||||||
}
|
}
|
||||||
@ -1101,10 +1196,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
|||||||
memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
|
memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
|
||||||
memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
|
memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if(IS_INTRA(mb_type)){
|
if(IS_INTRA(mb_type)){
|
||||||
if(h->deblocking_filter)
|
if(h->deblocking_filter)
|
||||||
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
|
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple, pixel_shift);
|
||||||
|
|
||||||
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
|
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
|
||||||
h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
|
h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
|
||||||
@ -1125,16 +1221,16 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
|||||||
uint8_t * const ptr= dest_y + block_offset[i];
|
uint8_t * const ptr= dest_y + block_offset[i];
|
||||||
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
|
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
|
||||||
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
|
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
|
||||||
h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
|
h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
|
||||||
}else{
|
}else{
|
||||||
const int nnz = h->non_zero_count_cache[ scan8[i] ];
|
const int nnz = h->non_zero_count_cache[ scan8[i] ];
|
||||||
h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
|
h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
|
||||||
(h->topright_samples_available<<i)&0x4000, linesize);
|
(h->topright_samples_available<<i)&0x4000, linesize);
|
||||||
if(nnz){
|
if(nnz){
|
||||||
if(nnz == 1 && h->mb[i*16])
|
if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
|
||||||
idct_dc_add(ptr, h->mb + i*16, linesize);
|
idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
|
||||||
else
|
else
|
||||||
idct_add (ptr, h->mb + i*16, linesize);
|
idct_add (ptr, h->mb + (i*16 << pixel_shift), linesize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1151,18 +1247,24 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
|||||||
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
|
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
|
||||||
|
|
||||||
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
|
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
|
||||||
h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
|
h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
|
||||||
}else{
|
}else{
|
||||||
uint8_t *topright;
|
uint8_t *topright;
|
||||||
int nnz, tr;
|
int nnz, tr;
|
||||||
|
uint64_t tr_high;
|
||||||
if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
|
if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
|
||||||
const int topright_avail= (h->topright_samples_available<<i)&0x8000;
|
const int topright_avail= (h->topright_samples_available<<i)&0x8000;
|
||||||
assert(mb_y || linesize <= block_offset[i]);
|
assert(mb_y || linesize <= block_offset[i]);
|
||||||
if(!topright_avail){
|
if(!topright_avail){
|
||||||
|
if (pixel_shift) {
|
||||||
|
tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
|
||||||
|
topright= (uint8_t*) &tr_high;
|
||||||
|
} else {
|
||||||
tr= ptr[3 - linesize]*0x01010101;
|
tr= ptr[3 - linesize]*0x01010101;
|
||||||
topright= (uint8_t*) &tr;
|
topright= (uint8_t*) &tr;
|
||||||
|
}
|
||||||
}else
|
}else
|
||||||
topright= ptr + 4 - linesize;
|
topright= ptr + (4 << pixel_shift) - linesize;
|
||||||
}else
|
}else
|
||||||
topright= NULL;
|
topright= NULL;
|
||||||
|
|
||||||
@ -1170,10 +1272,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
|||||||
nnz = h->non_zero_count_cache[ scan8[i] ];
|
nnz = h->non_zero_count_cache[ scan8[i] ];
|
||||||
if(nnz){
|
if(nnz){
|
||||||
if(is_h264){
|
if(is_h264){
|
||||||
if(nnz == 1 && h->mb[i*16])
|
if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
|
||||||
idct_dc_add(ptr, h->mb + i*16, linesize);
|
idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
|
||||||
else
|
else
|
||||||
idct_add (ptr, h->mb + i*16, linesize);
|
idct_add (ptr, h->mb + (i*16 << pixel_shift), linesize);
|
||||||
}else
|
}else
|
||||||
ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
|
ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
|
||||||
}
|
}
|
||||||
@ -1191,19 +1293,27 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
|||||||
static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
|
static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
|
||||||
8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
|
8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
|
||||||
for(i = 0; i < 16; i++)
|
for(i = 0; i < 16; i++)
|
||||||
h->mb[dc_mapping[i]] = h->mb_luma_dc[i];
|
dctcoef_set(h->mb, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc, pixel_shift, i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}else
|
}else
|
||||||
ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
|
ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
|
||||||
}
|
}
|
||||||
if(h->deblocking_filter)
|
if(h->deblocking_filter)
|
||||||
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
|
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift);
|
||||||
}else if(is_h264){
|
}else if(is_h264){
|
||||||
hl_motion(h, dest_y, dest_cb, dest_cr,
|
if (pixel_shift) {
|
||||||
|
hl_motion_16(h, dest_y, dest_cb, dest_cr,
|
||||||
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
|
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
|
||||||
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
|
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
|
||||||
h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab);
|
h->h264dsp.weight_h264_pixels_tab,
|
||||||
|
h->h264dsp.biweight_h264_pixels_tab);
|
||||||
|
} else
|
||||||
|
hl_motion_8(h, dest_y, dest_cb, dest_cr,
|
||||||
|
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
|
||||||
|
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
|
||||||
|
h->h264dsp.weight_h264_pixels_tab,
|
||||||
|
h->h264dsp.biweight_h264_pixels_tab);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1215,8 +1325,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
|||||||
h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
|
h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
|
||||||
}else{
|
}else{
|
||||||
for(i=0; i<16; i++){
|
for(i=0; i<16; i++){
|
||||||
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
|
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
|
||||||
s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
|
s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
@ -1228,7 +1338,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
|||||||
idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
|
idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
|
||||||
for(i=0; i<16; i+=di){
|
for(i=0; i<16; i+=di){
|
||||||
if(h->non_zero_count_cache[ scan8[i] ]){
|
if(h->non_zero_count_cache[ scan8[i] ]){
|
||||||
idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
|
idct_add(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
@ -1253,21 +1363,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
|||||||
uint8_t *dest[2] = {dest_cb, dest_cr};
|
uint8_t *dest[2] = {dest_cb, dest_cr};
|
||||||
if(transform_bypass){
|
if(transform_bypass){
|
||||||
if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
|
if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
|
||||||
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
|
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16 << pixel_shift), uvlinesize);
|
||||||
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
|
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16 << pixel_shift), uvlinesize);
|
||||||
}else{
|
}else{
|
||||||
idct_add = s->dsp.add_pixels4;
|
idct_add = s->dsp.add_pixels4;
|
||||||
for(i=16; i<16+8; i++){
|
for(i=16; i<16+8; i++){
|
||||||
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
|
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
|
||||||
idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
|
idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
if(is_h264){
|
if(is_h264){
|
||||||
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
|
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
|
||||||
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16 , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
|
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16 << pixel_shift) , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
|
||||||
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
|
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
|
||||||
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
|
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16) << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
|
||||||
h->h264dsp.h264_idct_add8(dest, block_offset,
|
h->h264dsp.h264_idct_add8(dest, block_offset,
|
||||||
h->mb, uvlinesize,
|
h->mb, uvlinesize,
|
||||||
h->non_zero_count_cache);
|
h->non_zero_count_cache);
|
||||||
@ -1291,15 +1401,18 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
|||||||
/**
|
/**
|
||||||
* Process a macroblock; this case avoids checks for expensive uncommon cases.
|
* Process a macroblock; this case avoids checks for expensive uncommon cases.
|
||||||
*/
|
*/
|
||||||
static void hl_decode_mb_simple(H264Context *h){
|
#define hl_decode_mb_simple(sh, bits) \
|
||||||
hl_decode_mb_internal(h, 1);
|
static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
|
||||||
|
hl_decode_mb_internal(h, 1, sh); \
|
||||||
}
|
}
|
||||||
|
hl_decode_mb_simple(0, 8);
|
||||||
|
hl_decode_mb_simple(1, 16);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process a macroblock; this handles edge cases, such as interlacing.
|
* Process a macroblock; this handles edge cases, such as interlacing.
|
||||||
*/
|
*/
|
||||||
static void av_noinline hl_decode_mb_complex(H264Context *h){
|
static void av_noinline hl_decode_mb_complex(H264Context *h){
|
||||||
hl_decode_mb_internal(h, 0);
|
hl_decode_mb_internal(h, 0, h->pixel_shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ff_h264_hl_decode_mb(H264Context *h){
|
void ff_h264_hl_decode_mb(H264Context *h){
|
||||||
@ -1308,9 +1421,12 @@ void ff_h264_hl_decode_mb(H264Context *h){
|
|||||||
const int mb_type= s->current_picture.mb_type[mb_xy];
|
const int mb_type= s->current_picture.mb_type[mb_xy];
|
||||||
int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
|
int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
|
||||||
|
|
||||||
if (is_complex)
|
if (is_complex) {
|
||||||
hl_decode_mb_complex(h);
|
hl_decode_mb_complex(h);
|
||||||
else hl_decode_mb_simple(h);
|
} else if (h->pixel_shift) {
|
||||||
|
hl_decode_mb_simple_16(h);
|
||||||
|
} else
|
||||||
|
hl_decode_mb_simple_8(h);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int pred_weight_table(H264Context *h){
|
static int pred_weight_table(H264Context *h){
|
||||||
@ -2432,6 +2548,7 @@ static void loop_filter(H264Context *h){
|
|||||||
int linesize, uvlinesize, mb_x, mb_y;
|
int linesize, uvlinesize, mb_x, mb_y;
|
||||||
const int end_mb_y= s->mb_y + FRAME_MBAFF;
|
const int end_mb_y= s->mb_y + FRAME_MBAFF;
|
||||||
const int old_slice_type= h->slice_type;
|
const int old_slice_type= h->slice_type;
|
||||||
|
const int pixel_shift = h->pixel_shift;
|
||||||
|
|
||||||
if(h->deblocking_filter) {
|
if(h->deblocking_filter) {
|
||||||
for(mb_x= 0; mb_x<s->mb_width; mb_x++){
|
for(mb_x= 0; mb_x<s->mb_width; mb_x++){
|
||||||
@ -2447,9 +2564,9 @@ static void loop_filter(H264Context *h){
|
|||||||
|
|
||||||
s->mb_x= mb_x;
|
s->mb_x= mb_x;
|
||||||
s->mb_y= mb_y;
|
s->mb_y= mb_y;
|
||||||
dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
|
dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;
|
||||||
dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
|
dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
|
||||||
dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
|
dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
|
||||||
//FIXME simplify above
|
//FIXME simplify above
|
||||||
|
|
||||||
if (MB_FIELD) {
|
if (MB_FIELD) {
|
||||||
|
@ -265,6 +265,7 @@ typedef struct MMCO{
|
|||||||
typedef struct H264Context{
|
typedef struct H264Context{
|
||||||
MpegEncContext s;
|
MpegEncContext s;
|
||||||
H264DSPContext h264dsp;
|
H264DSPContext h264dsp;
|
||||||
|
int pixel_shift; ///< 0 for 8-bit H264, 1 for high-bit-depth H264
|
||||||
int chroma_qp[2]; //QPc
|
int chroma_qp[2]; //QPc
|
||||||
|
|
||||||
int qp_thresh; ///< QP threshold to skip loopfilter
|
int qp_thresh; ///< QP threshold to skip loopfilter
|
||||||
@ -296,7 +297,7 @@ typedef struct H264Context{
|
|||||||
unsigned int top_samples_available;
|
unsigned int top_samples_available;
|
||||||
unsigned int topright_samples_available;
|
unsigned int topright_samples_available;
|
||||||
unsigned int left_samples_available;
|
unsigned int left_samples_available;
|
||||||
uint8_t (*top_borders[2])[16+2*8];
|
uint8_t (*top_borders[2])[(16+2*8)*2];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* non zero coeff count cache.
|
* non zero coeff count cache.
|
||||||
@ -406,9 +407,9 @@ typedef struct H264Context{
|
|||||||
GetBitContext *intra_gb_ptr;
|
GetBitContext *intra_gb_ptr;
|
||||||
GetBitContext *inter_gb_ptr;
|
GetBitContext *inter_gb_ptr;
|
||||||
|
|
||||||
DECLARE_ALIGNED(16, DCTELEM, mb)[16*24];
|
DECLARE_ALIGNED(16, DCTELEM, mb)[16*24*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
|
||||||
DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16];
|
DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16*2];
|
||||||
DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
|
DCTELEM mb_padding[256*2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cabac
|
* Cabac
|
||||||
|
@ -1100,47 +1100,54 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
do {
|
#define STORE_BLOCK(type) \
|
||||||
uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
|
do { \
|
||||||
|
uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base; \
|
||||||
|
\
|
||||||
|
int j= scantable[index[--coeff_count]]; \
|
||||||
|
\
|
||||||
|
if( get_cabac( CC, ctx ) == 0 ) { \
|
||||||
|
node_ctx = coeff_abs_level_transition[0][node_ctx]; \
|
||||||
|
if( is_dc ) { \
|
||||||
|
((type*)block)[j] = get_cabac_bypass_sign( CC, -1); \
|
||||||
|
}else{ \
|
||||||
|
((type*)block)[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6; \
|
||||||
|
} \
|
||||||
|
} else { \
|
||||||
|
int coeff_abs = 2; \
|
||||||
|
ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \
|
||||||
|
node_ctx = coeff_abs_level_transition[1][node_ctx]; \
|
||||||
|
\
|
||||||
|
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
|
||||||
|
coeff_abs++; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
if( coeff_abs >= 15 ) { \
|
||||||
|
int j = 0; \
|
||||||
|
while( get_cabac_bypass( CC ) ) { \
|
||||||
|
j++; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
coeff_abs=1; \
|
||||||
|
while( j-- ) { \
|
||||||
|
coeff_abs += coeff_abs + get_cabac_bypass( CC ); \
|
||||||
|
} \
|
||||||
|
coeff_abs+= 14; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
if( is_dc ) { \
|
||||||
|
((type*)block)[j] = get_cabac_bypass_sign( CC, -coeff_abs ); \
|
||||||
|
}else{ \
|
||||||
|
((type*)block)[j] = ((int)(get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32)) >> 6; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} while ( coeff_count );
|
||||||
|
|
||||||
int j= scantable[index[--coeff_count]];
|
if (h->pixel_shift) {
|
||||||
|
STORE_BLOCK(int32_t)
|
||||||
if( get_cabac( CC, ctx ) == 0 ) {
|
|
||||||
node_ctx = coeff_abs_level_transition[0][node_ctx];
|
|
||||||
if( is_dc ) {
|
|
||||||
block[j] = get_cabac_bypass_sign( CC, -1);
|
|
||||||
}else{
|
|
||||||
block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
int coeff_abs = 2;
|
STORE_BLOCK(int16_t)
|
||||||
ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
|
|
||||||
node_ctx = coeff_abs_level_transition[1][node_ctx];
|
|
||||||
|
|
||||||
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
|
|
||||||
coeff_abs++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if( coeff_abs >= 15 ) {
|
|
||||||
int j = 0;
|
|
||||||
while( get_cabac_bypass( CC ) ) {
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
|
|
||||||
coeff_abs=1;
|
|
||||||
while( j-- ) {
|
|
||||||
coeff_abs += coeff_abs + get_cabac_bypass( CC );
|
|
||||||
}
|
|
||||||
coeff_abs+= 14;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( is_dc ) {
|
|
||||||
block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
|
|
||||||
}else{
|
|
||||||
block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while( coeff_count );
|
|
||||||
#ifdef CABAC_ON_STACK
|
#ifdef CABAC_ON_STACK
|
||||||
h->cabac.range = cc.range ;
|
h->cabac.range = cc.range ;
|
||||||
h->cabac.low = cc.low ;
|
h->cabac.low = cc.low ;
|
||||||
@ -1196,6 +1203,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
|
|||||||
int mb_xy;
|
int mb_xy;
|
||||||
int mb_type, partition_count, cbp = 0;
|
int mb_type, partition_count, cbp = 0;
|
||||||
int dct8x8_allowed= h->pps.transform_8x8_mode;
|
int dct8x8_allowed= h->pps.transform_8x8_mode;
|
||||||
|
const int pixel_shift = h->pixel_shift;
|
||||||
|
|
||||||
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
|
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
|
||||||
|
|
||||||
@ -1304,6 +1312,7 @@ decode_intra_mb:
|
|||||||
h->slice_table[ mb_xy ]= h->slice_num;
|
h->slice_table[ mb_xy ]= h->slice_num;
|
||||||
|
|
||||||
if(IS_INTRA_PCM(mb_type)) {
|
if(IS_INTRA_PCM(mb_type)) {
|
||||||
|
const int mb_size = (384*h->sps.bit_depth_luma) >> 3;
|
||||||
const uint8_t *ptr;
|
const uint8_t *ptr;
|
||||||
|
|
||||||
// We assume these blocks are very rare so we do not optimize it.
|
// We assume these blocks are very rare so we do not optimize it.
|
||||||
@ -1316,9 +1325,9 @@ decode_intra_mb:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// The pixels are stored in the same order as levels in h->mb array.
|
// The pixels are stored in the same order as levels in h->mb array.
|
||||||
memcpy(h->mb, ptr, 256); ptr+=256;
|
memcpy(h->mb, ptr, 2*mb_size/3); ptr+=2*mb_size/3;
|
||||||
if(CHROMA){
|
if(CHROMA){
|
||||||
memcpy(h->mb+128, ptr, 128); ptr+=128;
|
memcpy(h->mb+mb_size/3, ptr, mb_size/3); ptr+=mb_size/3;
|
||||||
}
|
}
|
||||||
|
|
||||||
ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
|
ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
|
||||||
@ -1652,13 +1661,15 @@ decode_intra_mb:
|
|||||||
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
|
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
|
||||||
AV_ZERO128(h->mb_luma_dc+0);
|
AV_ZERO128(h->mb_luma_dc+0);
|
||||||
AV_ZERO128(h->mb_luma_dc+8);
|
AV_ZERO128(h->mb_luma_dc+8);
|
||||||
|
AV_ZERO128(h->mb_luma_dc+16);
|
||||||
|
AV_ZERO128(h->mb_luma_dc+24);
|
||||||
decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16);
|
decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16);
|
||||||
|
|
||||||
if( cbp&15 ) {
|
if( cbp&15 ) {
|
||||||
qmul = h->dequant4_coeff[0][s->qscale];
|
qmul = h->dequant4_coeff[0][s->qscale];
|
||||||
for( i = 0; i < 16; i++ ) {
|
for( i = 0; i < 16; i++ ) {
|
||||||
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
|
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
|
||||||
decode_cabac_residual_nondc(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
|
decode_cabac_residual_nondc(h, h->mb + (16*i << pixel_shift), 1, i, scan + 1, qmul, 15);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
|
fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
|
||||||
@ -1668,7 +1679,7 @@ decode_intra_mb:
|
|||||||
for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
|
for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
|
||||||
if( cbp & (1<<i8x8) ) {
|
if( cbp & (1<<i8x8) ) {
|
||||||
if( IS_8x8DCT(mb_type) ) {
|
if( IS_8x8DCT(mb_type) ) {
|
||||||
decode_cabac_residual_nondc(h, h->mb + 64*i8x8, 5, 4*i8x8,
|
decode_cabac_residual_nondc(h, h->mb + (64*i8x8 << pixel_shift), 5, 4*i8x8,
|
||||||
scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
|
scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
|
||||||
} else {
|
} else {
|
||||||
qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
|
qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
|
||||||
@ -1676,7 +1687,7 @@ decode_intra_mb:
|
|||||||
const int index = 4*i8x8 + i4x4;
|
const int index = 4*i8x8 + i4x4;
|
||||||
//av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
|
//av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
|
||||||
//START_TIMER
|
//START_TIMER
|
||||||
decode_cabac_residual_nondc(h, h->mb + 16*index, 2, index, scan, qmul, 16);
|
decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 2, index, scan, qmul, 16);
|
||||||
//STOP_TIMER("decode_residual")
|
//STOP_TIMER("decode_residual")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1691,7 +1702,7 @@ decode_intra_mb:
|
|||||||
int c;
|
int c;
|
||||||
for( c = 0; c < 2; c++ ) {
|
for( c = 0; c < 2; c++ ) {
|
||||||
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
|
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
|
||||||
decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
|
decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1702,7 +1713,7 @@ decode_intra_mb:
|
|||||||
for( i = 0; i < 4; i++ ) {
|
for( i = 0; i < 4; i++ ) {
|
||||||
const int index = 16 + 4 * c + i;
|
const int index = 16 + 4 * c + i;
|
||||||
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
|
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
|
||||||
decode_cabac_residual_nondc(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
|
decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -488,37 +488,44 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
|
|||||||
zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
|
zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
scantable += zeros_left + total_coeff - 1;
|
#define STORE_BLOCK(type) \
|
||||||
if(n >= LUMA_DC_BLOCK_INDEX){
|
scantable += zeros_left + total_coeff - 1; \
|
||||||
block[*scantable] = level[0];
|
if(n >= LUMA_DC_BLOCK_INDEX){ \
|
||||||
for(i=1;i<total_coeff && zeros_left > 0;i++) {
|
((type*)block)[*scantable] = level[0]; \
|
||||||
if(zeros_left < 7)
|
for(i=1;i<total_coeff && zeros_left > 0;i++) { \
|
||||||
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
|
if(zeros_left < 7) \
|
||||||
else
|
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
|
||||||
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
|
else \
|
||||||
zeros_left -= run_before;
|
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
|
||||||
scantable -= 1 + run_before;
|
zeros_left -= run_before; \
|
||||||
block[*scantable]= level[i];
|
scantable -= 1 + run_before; \
|
||||||
}
|
((type*)block)[*scantable]= level[i]; \
|
||||||
for(;i<total_coeff;i++) {
|
} \
|
||||||
scantable--;
|
for(;i<total_coeff;i++) { \
|
||||||
block[*scantable]= level[i];
|
scantable--; \
|
||||||
}
|
((type*)block)[*scantable]= level[i]; \
|
||||||
}else{
|
} \
|
||||||
block[*scantable] = (level[0] * qmul[*scantable] + 32)>>6;
|
}else{ \
|
||||||
for(i=1;i<total_coeff && zeros_left > 0;i++) {
|
((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
|
||||||
if(zeros_left < 7)
|
for(i=1;i<total_coeff && zeros_left > 0;i++) { \
|
||||||
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
|
if(zeros_left < 7) \
|
||||||
else
|
run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
|
||||||
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
|
else \
|
||||||
zeros_left -= run_before;
|
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
|
||||||
scantable -= 1 + run_before;
|
zeros_left -= run_before; \
|
||||||
block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6;
|
scantable -= 1 + run_before; \
|
||||||
}
|
((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
|
||||||
for(;i<total_coeff;i++) {
|
} \
|
||||||
scantable--;
|
for(;i<total_coeff;i++) { \
|
||||||
block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6;
|
scantable--; \
|
||||||
|
((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
|
||||||
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (h->pixel_shift) {
|
||||||
|
STORE_BLOCK(int32_t)
|
||||||
|
} else {
|
||||||
|
STORE_BLOCK(int16_t)
|
||||||
}
|
}
|
||||||
|
|
||||||
if(zeros_left<0){
|
if(zeros_left<0){
|
||||||
@ -535,6 +542,7 @@ int ff_h264_decode_mb_cavlc(H264Context *h){
|
|||||||
int partition_count;
|
int partition_count;
|
||||||
unsigned int mb_type, cbp;
|
unsigned int mb_type, cbp;
|
||||||
int dct8x8_allowed= h->pps.transform_8x8_mode;
|
int dct8x8_allowed= h->pps.transform_8x8_mode;
|
||||||
|
const int pixel_shift = h->pixel_shift;
|
||||||
|
|
||||||
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
|
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
|
||||||
|
|
||||||
@ -605,7 +613,7 @@ decode_intra_mb:
|
|||||||
align_get_bits(&s->gb);
|
align_get_bits(&s->gb);
|
||||||
|
|
||||||
// The pixels are stored in the same order as levels in h->mb array.
|
// The pixels are stored in the same order as levels in h->mb array.
|
||||||
for(x=0; x < (CHROMA ? 384 : 256); x++){
|
for(x=0; x < (CHROMA ? 384 : 256)*h->sps.bit_depth_luma/8; x++){
|
||||||
((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
|
((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -941,6 +949,8 @@ decode_intra_mb:
|
|||||||
if(IS_INTRA16x16(mb_type)){
|
if(IS_INTRA16x16(mb_type)){
|
||||||
AV_ZERO128(h->mb_luma_dc+0);
|
AV_ZERO128(h->mb_luma_dc+0);
|
||||||
AV_ZERO128(h->mb_luma_dc+8);
|
AV_ZERO128(h->mb_luma_dc+8);
|
||||||
|
AV_ZERO128(h->mb_luma_dc+16);
|
||||||
|
AV_ZERO128(h->mb_luma_dc+24);
|
||||||
if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
|
if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
|
||||||
return -1; //FIXME continue if partitioned and other return -1 too
|
return -1; //FIXME continue if partitioned and other return -1 too
|
||||||
}
|
}
|
||||||
@ -951,7 +961,7 @@ decode_intra_mb:
|
|||||||
for(i8x8=0; i8x8<4; i8x8++){
|
for(i8x8=0; i8x8<4; i8x8++){
|
||||||
for(i4x4=0; i4x4<4; i4x4++){
|
for(i4x4=0; i4x4<4; i4x4++){
|
||||||
const int index= i4x4 + 4*i8x8;
|
const int index= i4x4 + 4*i8x8;
|
||||||
if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
|
if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -963,7 +973,7 @@ decode_intra_mb:
|
|||||||
for(i8x8=0; i8x8<4; i8x8++){
|
for(i8x8=0; i8x8<4; i8x8++){
|
||||||
if(cbp & (1<<i8x8)){
|
if(cbp & (1<<i8x8)){
|
||||||
if(IS_8x8DCT(mb_type)){
|
if(IS_8x8DCT(mb_type)){
|
||||||
DCTELEM *buf = &h->mb[64*i8x8];
|
DCTELEM *buf = &h->mb[64*i8x8 << pixel_shift];
|
||||||
uint8_t *nnz;
|
uint8_t *nnz;
|
||||||
for(i4x4=0; i4x4<4; i4x4++){
|
for(i4x4=0; i4x4<4; i4x4++){
|
||||||
if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
|
if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
|
||||||
@ -976,7 +986,7 @@ decode_intra_mb:
|
|||||||
for(i4x4=0; i4x4<4; i4x4++){
|
for(i4x4=0; i4x4<4; i4x4++){
|
||||||
const int index= i4x4 + 4*i8x8;
|
const int index= i4x4 + 4*i8x8;
|
||||||
|
|
||||||
if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
|
if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -990,7 +1000,7 @@ decode_intra_mb:
|
|||||||
|
|
||||||
if(cbp&0x30){
|
if(cbp&0x30){
|
||||||
for(chroma_idx=0; chroma_idx<2; chroma_idx++)
|
for(chroma_idx=0; chroma_idx<2; chroma_idx++)
|
||||||
if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
|
if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1000,7 +1010,7 @@ decode_intra_mb:
|
|||||||
const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
|
const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
|
||||||
for(i4x4=0; i4x4<4; i4x4++){
|
for(i4x4=0; i4x4<4; i4x4++){
|
||||||
const int index= 16 + 4*chroma_idx + i4x4;
|
const int index= 16 + 4*chroma_idx + i4x4;
|
||||||
if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
|
if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -544,10 +544,10 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
|
|||||||
tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
|
tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
|
||||||
//{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
|
//{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
|
||||||
if( dir == 0 ) {
|
if( dir == 0 ) {
|
||||||
filter_mb_edgev( &img_y[4*edge], linesize, bS, qp, h );
|
filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h );
|
||||||
if( (edge&1) == 0 ) {
|
if( (edge&1) == 0 ) {
|
||||||
filter_mb_edgecv( &img_cb[2*edge], uvlinesize, bS, h->chroma_qp[0], h);
|
filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
|
||||||
filter_mb_edgecv( &img_cr[2*edge], uvlinesize, bS, h->chroma_qp[1], h);
|
filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
|
filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
|
||||||
|
@ -273,6 +273,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
|
|||||||
int unaligned;
|
int unaligned;
|
||||||
AVPicture picture;
|
AVPicture picture;
|
||||||
int stride_align[4];
|
int stride_align[4];
|
||||||
|
const int pixel_size = av_pix_fmt_descriptors[s->pix_fmt].comp[0].step_minus1+1;
|
||||||
|
|
||||||
avcodec_get_chroma_sub_sample(s->pix_fmt, &h_chroma_shift, &v_chroma_shift);
|
avcodec_get_chroma_sub_sample(s->pix_fmt, &h_chroma_shift, &v_chroma_shift);
|
||||||
|
|
||||||
@ -322,7 +323,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
|
|||||||
if((s->flags&CODEC_FLAG_EMU_EDGE) || !size[2])
|
if((s->flags&CODEC_FLAG_EMU_EDGE) || !size[2])
|
||||||
buf->data[i] = buf->base[i];
|
buf->data[i] = buf->base[i];
|
||||||
else
|
else
|
||||||
buf->data[i] = buf->base[i] + FFALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (EDGE_WIDTH>>h_shift), stride_align[i]);
|
buf->data[i] = buf->base[i] + FFALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (pixel_size*EDGE_WIDTH>>h_shift), stride_align[i]);
|
||||||
}
|
}
|
||||||
if(size[1] && !size[2])
|
if(size[1] && !size[2])
|
||||||
ff_set_systematic_pal2((uint32_t*)buf->data[1], s->pix_fmt);
|
ff_set_systematic_pal2((uint32_t*)buf->data[1], s->pix_fmt);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user