vp9: add support for resolution changes in inter frames.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit e8b4f6d6be)
Signed-off-by: Andreas Cadhalpun <Andreas.Cadhalpun@googlemail.com>
			
			
This commit is contained in:
		 Ronald S. Bultje
					Ronald S. Bultje
				
			
				
					committed by
					
						 Andreas Cadhalpun
						Andreas Cadhalpun
					
				
			
			
				
	
			
			
			 Andreas Cadhalpun
						Andreas Cadhalpun
					
				
			
						parent
						
							48d388b033
						
					
				
				
					commit
					cf4b0fb4d6
				
			
							
								
								
									
										292
									
								
								libavcodec/vp9.c
									
									
									
									
									
								
							
							
						
						
									
										292
									
								
								libavcodec/vp9.c
									
									
									
									
									
								
							| @@ -240,7 +240,7 @@ typedef struct VP9Context { | |||||||
|     // whole-frame cache |     // whole-frame cache | ||||||
|     uint8_t *intra_pred_data[3]; |     uint8_t *intra_pred_data[3]; | ||||||
|     struct VP9Filter *lflvl; |     struct VP9Filter *lflvl; | ||||||
|     DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80]; |     DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135*144]; | ||||||
|  |  | ||||||
|     // block reconstruction intermediates |     // block reconstruction intermediates | ||||||
|     int block_alloc_using_2pass; |     int block_alloc_using_2pass; | ||||||
| @@ -249,6 +249,8 @@ typedef struct VP9Context { | |||||||
|     struct { int x, y; } min_mv, max_mv; |     struct { int x, y; } min_mv, max_mv; | ||||||
|     DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64]; |     DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64]; | ||||||
|     DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32]; |     DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32]; | ||||||
|  |     uint16_t mvscale[3][2]; | ||||||
|  |     uint8_t mvstep[3][2]; | ||||||
| } VP9Context; | } VP9Context; | ||||||
|  |  | ||||||
| static const uint8_t bwh_tab[2][N_BS_SIZES][2] = { | static const uint8_t bwh_tab[2][N_BS_SIZES][2] = { | ||||||
| @@ -583,6 +585,26 @@ static int decode_frame_header(AVCodecContext *ctx, | |||||||
|                     s->varcompref[1] = 2; |                     s->varcompref[1] = 2; | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  |  | ||||||
|  |             for (i = 0; i < 3; i++) { | ||||||
|  |                 AVFrame *ref = s->refs[s->refidx[i]].f; | ||||||
|  |                 int refw = ref->width, refh = ref->height; | ||||||
|  |  | ||||||
|  |                 if (refw == w && refh == h) { | ||||||
|  |                     s->mvscale[i][0] = s->mvscale[i][1] = 0; | ||||||
|  |                 } else { | ||||||
|  |                     if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) { | ||||||
|  |                         av_log(ctx, AV_LOG_ERROR, | ||||||
|  |                                "Invalid ref frame dimensions %dx%d for frame size %dx%d\n", | ||||||
|  |                                refw, refh, w, h); | ||||||
|  |                         return AVERROR_INVALIDDATA; | ||||||
|  |                     } | ||||||
|  |                     s->mvscale[i][0] = (refw << 14) / w; | ||||||
|  |                     s->mvscale[i][1] = (refh << 14) / h; | ||||||
|  |                     s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14; | ||||||
|  |                     s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     s->refreshctx   = s->errorres ? 0 : get_bits1(&s->gb); |     s->refreshctx   = s->errorres ? 0 : get_bits1(&s->gb); | ||||||
| @@ -2537,7 +2559,113 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off) | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2], | static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc, | ||||||
|  |                                             uint8_t *dst, ptrdiff_t dst_stride, | ||||||
|  |                                             const uint8_t *ref, ptrdiff_t ref_stride, | ||||||
|  |                                             ThreadFrame *ref_frame, | ||||||
|  |                                             ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, | ||||||
|  |                                             int bw, int bh, int w, int h, | ||||||
|  |                                             const uint16_t *scale, const uint8_t *step) | ||||||
|  | { | ||||||
|  | #define scale_mv(n, dim) (((int64_t)n * scale[dim]) >> 14) | ||||||
|  |     // BUG libvpx seems to scale the two components separately. This introduces | ||||||
|  |     // rounding errors but we have to reproduce them to be exactly compatible | ||||||
|  |     // with the output from libvpx... | ||||||
|  |     int mx = scale_mv(mv->x * 2, 0) + scale_mv(x * 16, 0); | ||||||
|  |     int my = scale_mv(mv->y * 2, 1) + scale_mv(y * 16, 1); | ||||||
|  |     int refbw_m1, refbh_m1; | ||||||
|  |     int th; | ||||||
|  |  | ||||||
|  |     y = my >> 4; | ||||||
|  |     x = mx >> 4; | ||||||
|  |     ref += y * ref_stride + x; | ||||||
|  |     mx &= 15; | ||||||
|  |     my &= 15; | ||||||
|  |     refbw_m1 = ((bw - 1) * step[0] + mx) >> 4; | ||||||
|  |     refbh_m1 = ((bh - 1) * step[1] + my) >> 4; | ||||||
|  |     // FIXME bilinear filter only needs 0/1 pixels, not 3/4 | ||||||
|  |     // we use +7 because the last 7 pixels of each sbrow can be changed in | ||||||
|  |     // the longest loopfilter of the next sbrow | ||||||
|  |     th = (y + refbh_m1 + 4 + 7) >> 6; | ||||||
|  |     ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); | ||||||
|  |     if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) { | ||||||
|  |         s->vdsp.emulated_edge_mc(s->edge_emu_buffer, | ||||||
|  |                                  ref - 3 * ref_stride - 3, | ||||||
|  |                                  144, ref_stride, | ||||||
|  |                                  refbw_m1 + 8, refbh_m1 + 8, | ||||||
|  |                                  x - 3, y - 3, w, h); | ||||||
|  |         ref = s->edge_emu_buffer + 3 * 144 + 3; | ||||||
|  |         ref_stride = 144; | ||||||
|  |     } | ||||||
|  |     smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc, | ||||||
|  |                                               uint8_t *dst_u, uint8_t *dst_v, | ||||||
|  |                                               ptrdiff_t dst_stride, | ||||||
|  |                                               const uint8_t *ref_u, ptrdiff_t src_stride_u, | ||||||
|  |                                               const uint8_t *ref_v, ptrdiff_t src_stride_v, | ||||||
|  |                                               ThreadFrame *ref_frame, | ||||||
|  |                                               ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, | ||||||
|  |                                               int bw, int bh, int w, int h, | ||||||
|  |                                               const uint16_t *scale, const uint8_t *step) | ||||||
|  | { | ||||||
|  |     // BUG https://code.google.com/p/webm/issues/detail?id=820 | ||||||
|  |     int mx = scale_mv(mv->x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15); | ||||||
|  |     int my = scale_mv(mv->y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15); | ||||||
|  | #undef scale_mv | ||||||
|  |     int refbw_m1, refbh_m1; | ||||||
|  |     int th; | ||||||
|  |  | ||||||
|  |     y = my >> 4; | ||||||
|  |     x = mx >> 4; | ||||||
|  |     ref_u += y * src_stride_u + x; | ||||||
|  |     ref_v += y * src_stride_v + x; | ||||||
|  |     mx &= 15; | ||||||
|  |     my &= 15; | ||||||
|  |     refbw_m1 = ((bw - 1) * step[0] + mx) >> 4; | ||||||
|  |     refbh_m1 = ((bh - 1) * step[1] + my) >> 4; | ||||||
|  |     // FIXME bilinear filter only needs 0/1 pixels, not 3/4 | ||||||
|  |     // we use +7 because the last 7 pixels of each sbrow can be changed in | ||||||
|  |     // the longest loopfilter of the next sbrow | ||||||
|  |     th = (y + refbh_m1 + 4 + 7) >> 5; | ||||||
|  |     ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); | ||||||
|  |     if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) { | ||||||
|  |         s->vdsp.emulated_edge_mc(s->edge_emu_buffer, | ||||||
|  |                                  ref_u - 3 * src_stride_u - 3, | ||||||
|  |                                  144, src_stride_u, | ||||||
|  |                                  refbw_m1 + 8, refbh_m1 + 8, | ||||||
|  |                                  x - 3, y - 3, w, h); | ||||||
|  |         ref_u = s->edge_emu_buffer + 3 * 144 + 3; | ||||||
|  |         smc(dst_u, dst_stride, ref_u, 144, bh, mx, my, step[0], step[1]); | ||||||
|  |  | ||||||
|  |         s->vdsp.emulated_edge_mc(s->edge_emu_buffer, | ||||||
|  |                                  ref_v - 3 * src_stride_v - 3, | ||||||
|  |                                  144, src_stride_v, | ||||||
|  |                                  refbw_m1 + 8, refbh_m1 + 8, | ||||||
|  |                                  x - 3, y - 3, w, h); | ||||||
|  |         ref_v = s->edge_emu_buffer + 3 * 144 + 3; | ||||||
|  |         smc(dst_v, dst_stride, ref_v, 144, bh, mx, my, step[0], step[1]); | ||||||
|  |     } else { | ||||||
|  |         smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]); | ||||||
|  |         smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #define FN(x) x##_scaled | ||||||
|  | #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \ | ||||||
|  |     mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \ | ||||||
|  |                    mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) | ||||||
|  | #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ | ||||||
|  |                       row, col, mv, bw, bh, w, h, i) \ | ||||||
|  |     mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ | ||||||
|  |                      row, col, mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) | ||||||
|  | #include "vp9_mc_template.c" | ||||||
|  | #undef mc_luma_dir | ||||||
|  | #undef mc_chroma_dir | ||||||
|  | #undef FN | ||||||
|  |  | ||||||
|  | static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2], | ||||||
|                                               uint8_t *dst, ptrdiff_t dst_stride, |                                               uint8_t *dst, ptrdiff_t dst_stride, | ||||||
|                                               const uint8_t *ref, ptrdiff_t ref_stride, |                                               const uint8_t *ref, ptrdiff_t ref_stride, | ||||||
|                                               ThreadFrame *ref_frame, |                                               ThreadFrame *ref_frame, | ||||||
| @@ -2569,7 +2697,7 @@ static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2], | |||||||
|     mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1); |     mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1); | ||||||
| } | } | ||||||
|  |  | ||||||
| static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2], | static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2], | ||||||
|                                                 uint8_t *dst_u, uint8_t *dst_v, |                                                 uint8_t *dst_u, uint8_t *dst_v, | ||||||
|                                                 ptrdiff_t dst_stride, |                                                 ptrdiff_t dst_stride, | ||||||
|                                                 const uint8_t *ref_u, ptrdiff_t src_stride_u, |                                                 const uint8_t *ref_u, ptrdiff_t src_stride_u, | ||||||
| @@ -2614,156 +2742,32 @@ static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2], | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #define FN(x) x | ||||||
|  | #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \ | ||||||
|  |     mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \ | ||||||
|  |                      mv, bw, bh, w, h) | ||||||
|  | #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ | ||||||
|  |                       row, col, mv, bw, bh, w, h, i) \ | ||||||
|  |     mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ | ||||||
|  |                        row, col, mv, bw, bh, w, h) | ||||||
|  | #include "vp9_mc_template.c" | ||||||
|  | #undef mc_luma_dir_dir | ||||||
|  | #undef mc_chroma_dir_dir | ||||||
|  | #undef FN | ||||||
|  |  | ||||||
| static void inter_recon(AVCodecContext *ctx) | static void inter_recon(AVCodecContext *ctx) | ||||||
| { | { | ||||||
|     static const uint8_t bwlog_tab[2][N_BS_SIZES] = { |  | ||||||
|         { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 }, |  | ||||||
|         { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 }, |  | ||||||
|     }; |  | ||||||
|     VP9Context *s = ctx->priv_data; |     VP9Context *s = ctx->priv_data; | ||||||
|     VP9Block *b = s->b; |     VP9Block *b = s->b; | ||||||
|     int row = s->row, col = s->col; |     int row = s->row, col = s->col; | ||||||
|     ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2; |  | ||||||
|     AVFrame *ref1 = tref1->f, *ref2; |  | ||||||
|     int w1 = ref1->width, h1 = ref1->height, w2, h2; |  | ||||||
|     ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride; |  | ||||||
|  |  | ||||||
|     if (b->comp) { |     if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) { | ||||||
|         tref2 = &s->refs[s->refidx[b->ref[1]]]; |         inter_pred_scaled(ctx); | ||||||
|         ref2 = tref2->f; |  | ||||||
|         w2 = ref2->width; |  | ||||||
|         h2 = ref2->height; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // y inter pred |  | ||||||
|     if (b->bs > BS_8x8) { |  | ||||||
|         if (b->bs == BS_8x4) { |  | ||||||
|             mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y, |  | ||||||
|                         ref1->data[0], ref1->linesize[0], tref1, |  | ||||||
|                         row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1); |  | ||||||
|             mc_luma_dir(s, s->dsp.mc[3][b->filter][0], |  | ||||||
|                         s->dst[0] + 4 * ls_y, ls_y, |  | ||||||
|                         ref1->data[0], ref1->linesize[0], tref1, |  | ||||||
|                         (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1); |  | ||||||
|  |  | ||||||
|             if (b->comp) { |  | ||||||
|                 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y, |  | ||||||
|                             ref2->data[0], ref2->linesize[0], tref2, |  | ||||||
|                             row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2); |  | ||||||
|                 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], |  | ||||||
|                             s->dst[0] + 4 * ls_y, ls_y, |  | ||||||
|                             ref2->data[0], ref2->linesize[0], tref2, |  | ||||||
|                             (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2); |  | ||||||
|             } |  | ||||||
|         } else if (b->bs == BS_4x8) { |  | ||||||
|             mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y, |  | ||||||
|                         ref1->data[0], ref1->linesize[0], tref1, |  | ||||||
|                         row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1); |  | ||||||
|             mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y, |  | ||||||
|                         ref1->data[0], ref1->linesize[0], tref1, |  | ||||||
|                         row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1); |  | ||||||
|  |  | ||||||
|             if (b->comp) { |  | ||||||
|                 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y, |  | ||||||
|                             ref2->data[0], ref2->linesize[0], tref2, |  | ||||||
|                             row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2); |  | ||||||
|                 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y, |  | ||||||
|                             ref2->data[0], ref2->linesize[0], tref2, |  | ||||||
|                             row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2); |  | ||||||
|             } |  | ||||||
|     } else { |     } else { | ||||||
|             av_assert2(b->bs == BS_4x4); |         inter_pred(ctx); | ||||||
|  |  | ||||||
|             // FIXME if two horizontally adjacent blocks have the same MV, |  | ||||||
|             // do a w8 instead of a w4 call |  | ||||||
|             mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y, |  | ||||||
|                         ref1->data[0], ref1->linesize[0], tref1, |  | ||||||
|                         row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1); |  | ||||||
|             mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y, |  | ||||||
|                         ref1->data[0], ref1->linesize[0], tref1, |  | ||||||
|                         row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1); |  | ||||||
|             mc_luma_dir(s, s->dsp.mc[4][b->filter][0], |  | ||||||
|                         s->dst[0] + 4 * ls_y, ls_y, |  | ||||||
|                         ref1->data[0], ref1->linesize[0], tref1, |  | ||||||
|                         (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1); |  | ||||||
|             mc_luma_dir(s, s->dsp.mc[4][b->filter][0], |  | ||||||
|                         s->dst[0] + 4 * ls_y + 4, ls_y, |  | ||||||
|                         ref1->data[0], ref1->linesize[0], tref1, |  | ||||||
|                         (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1); |  | ||||||
|  |  | ||||||
|             if (b->comp) { |  | ||||||
|                 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y, |  | ||||||
|                             ref2->data[0], ref2->linesize[0], tref2, |  | ||||||
|                             row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2); |  | ||||||
|                 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y, |  | ||||||
|                             ref2->data[0], ref2->linesize[0], tref2, |  | ||||||
|                             row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2); |  | ||||||
|                 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], |  | ||||||
|                             s->dst[0] + 4 * ls_y, ls_y, |  | ||||||
|                             ref2->data[0], ref2->linesize[0], tref2, |  | ||||||
|                             (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2); |  | ||||||
|                 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], |  | ||||||
|                             s->dst[0] + 4 * ls_y + 4, ls_y, |  | ||||||
|                             ref2->data[0], ref2->linesize[0], tref2, |  | ||||||
|                             (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2); |  | ||||||
|     } |     } | ||||||
|         } |  | ||||||
|     } else { |  | ||||||
|         int bwl = bwlog_tab[0][b->bs]; |  | ||||||
|         int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4; |  | ||||||
|  |  | ||||||
|         mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y, |  | ||||||
|                     ref1->data[0], ref1->linesize[0], tref1, |  | ||||||
|                     row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1); |  | ||||||
|  |  | ||||||
|         if (b->comp) |  | ||||||
|             mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y, |  | ||||||
|                         ref2->data[0], ref2->linesize[0], tref2, |  | ||||||
|                         row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // uv inter pred |  | ||||||
|     { |  | ||||||
|         int bwl = bwlog_tab[1][b->bs]; |  | ||||||
|         int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4; |  | ||||||
|         VP56mv mvuv; |  | ||||||
|  |  | ||||||
|         w1 = (w1 + 1) >> 1; |  | ||||||
|         h1 = (h1 + 1) >> 1; |  | ||||||
|         if (b->comp) { |  | ||||||
|             w2 = (w2 + 1) >> 1; |  | ||||||
|             h2 = (h2 + 1) >> 1; |  | ||||||
|         } |  | ||||||
|         if (b->bs > BS_8x8) { |  | ||||||
|             mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4); |  | ||||||
|             mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4); |  | ||||||
|         } else { |  | ||||||
|             mvuv = b->mv[0][0]; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0], |  | ||||||
|                       s->dst[1], s->dst[2], ls_uv, |  | ||||||
|                       ref1->data[1], ref1->linesize[1], |  | ||||||
|                       ref1->data[2], ref1->linesize[2], tref1, |  | ||||||
|                       row << 2, col << 2, &mvuv, bw, bh, w1, h1); |  | ||||||
|  |  | ||||||
|         if (b->comp) { |  | ||||||
|             if (b->bs > BS_8x8) { |  | ||||||
|                 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4); |  | ||||||
|                 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4); |  | ||||||
|             } else { |  | ||||||
|                 mvuv = b->mv[0][1]; |  | ||||||
|             } |  | ||||||
|             mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1], |  | ||||||
|                           s->dst[1], s->dst[2], ls_uv, |  | ||||||
|                           ref2->data[1], ref2->linesize[1], |  | ||||||
|                           ref2->data[2], ref2->linesize[2], tref2, |  | ||||||
|                           row << 2, col << 2, &mvuv, bw, bh, w2, h2); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (!b->skip) { |     if (!b->skip) { | ||||||
|         /* mostly copied intra_reconn() */ |         /* mostly copied intra_recon() */ | ||||||
|  |  | ||||||
|         int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n; |         int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n; | ||||||
|         int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2); |         int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2); | ||||||
|   | |||||||
							
								
								
									
										171
									
								
								libavcodec/vp9_mc_template.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										171
									
								
								libavcodec/vp9_mc_template.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,171 @@ | |||||||
|  | /* | ||||||
|  |  * VP9 compatible video decoder | ||||||
|  |  * | ||||||
|  |  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> | ||||||
|  |  * Copyright (C) 2013 Clément Bœsch <u pkh me> | ||||||
|  |  * | ||||||
|  |  * This file is part of FFmpeg. | ||||||
|  |  * | ||||||
|  |  * FFmpeg is free software; you can redistribute it and/or | ||||||
|  |  * modify it under the terms of the GNU Lesser General Public | ||||||
|  |  * License as published by the Free Software Foundation; either | ||||||
|  |  * version 2.1 of the License, or (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * FFmpeg is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||||
|  |  * Lesser General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU Lesser General Public | ||||||
|  |  * License along with FFmpeg; if not, write to the Free Software | ||||||
|  |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | static void FN(inter_pred)(AVCodecContext *ctx) | ||||||
|  | { | ||||||
|  |     static const uint8_t bwlog_tab[2][N_BS_SIZES] = { | ||||||
|  |         { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 }, | ||||||
|  |         { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 }, | ||||||
|  |     }; | ||||||
|  |     VP9Context *s = ctx->priv_data; | ||||||
|  |     VP9Block *b = s->b; | ||||||
|  |     int row = s->row, col = s->col; | ||||||
|  |     ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2; | ||||||
|  |     AVFrame *ref1 = tref1->f, *ref2; | ||||||
|  |     int w1 = ref1->width, h1 = ref1->height, w2, h2; | ||||||
|  |     ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride; | ||||||
|  |  | ||||||
|  |     if (b->comp) { | ||||||
|  |         tref2 = &s->refs[s->refidx[b->ref[1]]]; | ||||||
|  |         ref2 = tref2->f; | ||||||
|  |         w2 = ref2->width; | ||||||
|  |         h2 = ref2->height; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // y inter pred | ||||||
|  |     if (b->bs > BS_8x8) { | ||||||
|  |         if (b->bs == BS_8x4) { | ||||||
|  |             mc_luma_dir(s, mc[3][b->filter][0], s->dst[0], ls_y, | ||||||
|  |                         ref1->data[0], ref1->linesize[0], tref1, | ||||||
|  |                         row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1, 0); | ||||||
|  |             mc_luma_dir(s, mc[3][b->filter][0], | ||||||
|  |                         s->dst[0] + 4 * ls_y, ls_y, | ||||||
|  |                         ref1->data[0], ref1->linesize[0], tref1, | ||||||
|  |                         (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1, 0); | ||||||
|  |  | ||||||
|  |             if (b->comp) { | ||||||
|  |                 mc_luma_dir(s, mc[3][b->filter][1], s->dst[0], ls_y, | ||||||
|  |                             ref2->data[0], ref2->linesize[0], tref2, | ||||||
|  |                             row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2, 1); | ||||||
|  |                 mc_luma_dir(s, mc[3][b->filter][1], | ||||||
|  |                             s->dst[0] + 4 * ls_y, ls_y, | ||||||
|  |                             ref2->data[0], ref2->linesize[0], tref2, | ||||||
|  |                             (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2, 1); | ||||||
|  |             } | ||||||
|  |         } else if (b->bs == BS_4x8) { | ||||||
|  |             mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y, | ||||||
|  |                         ref1->data[0], ref1->linesize[0], tref1, | ||||||
|  |                         row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1, 0); | ||||||
|  |             mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y, | ||||||
|  |                         ref1->data[0], ref1->linesize[0], tref1, | ||||||
|  |                         row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1, 0); | ||||||
|  |  | ||||||
|  |             if (b->comp) { | ||||||
|  |                 mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y, | ||||||
|  |                             ref2->data[0], ref2->linesize[0], tref2, | ||||||
|  |                             row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2, 1); | ||||||
|  |                 mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y, | ||||||
|  |                             ref2->data[0], ref2->linesize[0], tref2, | ||||||
|  |                             row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2, 1); | ||||||
|  |             } | ||||||
|  |         } else { | ||||||
|  |             av_assert2(b->bs == BS_4x4); | ||||||
|  |  | ||||||
|  |             // FIXME if two horizontally adjacent blocks have the same MV, | ||||||
|  |             // do a w8 instead of a w4 call | ||||||
|  |             mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y, | ||||||
|  |                         ref1->data[0], ref1->linesize[0], tref1, | ||||||
|  |                         row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1, 0); | ||||||
|  |             mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y, | ||||||
|  |                         ref1->data[0], ref1->linesize[0], tref1, | ||||||
|  |                         row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1, 0); | ||||||
|  |             mc_luma_dir(s, mc[4][b->filter][0], | ||||||
|  |                         s->dst[0] + 4 * ls_y, ls_y, | ||||||
|  |                         ref1->data[0], ref1->linesize[0], tref1, | ||||||
|  |                         (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1, 0); | ||||||
|  |             mc_luma_dir(s, mc[4][b->filter][0], | ||||||
|  |                         s->dst[0] + 4 * ls_y + 4, ls_y, | ||||||
|  |                         ref1->data[0], ref1->linesize[0], tref1, | ||||||
|  |                         (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1, 0); | ||||||
|  |  | ||||||
|  |             if (b->comp) { | ||||||
|  |                 mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y, | ||||||
|  |                             ref2->data[0], ref2->linesize[0], tref2, | ||||||
|  |                             row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2, 1); | ||||||
|  |                 mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y, | ||||||
|  |                             ref2->data[0], ref2->linesize[0], tref2, | ||||||
|  |                             row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2, 1); | ||||||
|  |                 mc_luma_dir(s, mc[4][b->filter][1], | ||||||
|  |                             s->dst[0] + 4 * ls_y, ls_y, | ||||||
|  |                             ref2->data[0], ref2->linesize[0], tref2, | ||||||
|  |                             (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2, 1); | ||||||
|  |                 mc_luma_dir(s, mc[4][b->filter][1], | ||||||
|  |                             s->dst[0] + 4 * ls_y + 4, ls_y, | ||||||
|  |                             ref2->data[0], ref2->linesize[0], tref2, | ||||||
|  |                             (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2, 1); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } else { | ||||||
|  |         int bwl = bwlog_tab[0][b->bs]; | ||||||
|  |         int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4; | ||||||
|  |  | ||||||
|  |         mc_luma_dir(s, mc[bwl][b->filter][0], s->dst[0], ls_y, | ||||||
|  |                     ref1->data[0], ref1->linesize[0], tref1, | ||||||
|  |                     row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1, 0); | ||||||
|  |  | ||||||
|  |         if (b->comp) | ||||||
|  |             mc_luma_dir(s, mc[bwl][b->filter][1], s->dst[0], ls_y, | ||||||
|  |                         ref2->data[0], ref2->linesize[0], tref2, | ||||||
|  |                         row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2, 1); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // uv inter pred | ||||||
|  |     { | ||||||
|  |         int bwl = bwlog_tab[1][b->bs]; | ||||||
|  |         int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4; | ||||||
|  |         VP56mv mvuv; | ||||||
|  |  | ||||||
|  |         w1 = (w1 + 1) >> 1; | ||||||
|  |         h1 = (h1 + 1) >> 1; | ||||||
|  |         if (b->comp) { | ||||||
|  |             w2 = (w2 + 1) >> 1; | ||||||
|  |             h2 = (h2 + 1) >> 1; | ||||||
|  |         } | ||||||
|  |         if (b->bs > BS_8x8) { | ||||||
|  |             mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4); | ||||||
|  |             mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4); | ||||||
|  |         } else { | ||||||
|  |             mvuv = b->mv[0][0]; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         mc_chroma_dir(s, mc[bwl][b->filter][0], | ||||||
|  |                       s->dst[1], s->dst[2], ls_uv, | ||||||
|  |                       ref1->data[1], ref1->linesize[1], | ||||||
|  |                       ref1->data[2], ref1->linesize[2], tref1, | ||||||
|  |                       row << 2, col << 2, &mvuv, bw, bh, w1, h1, 0); | ||||||
|  |  | ||||||
|  |         if (b->comp) { | ||||||
|  |             if (b->bs > BS_8x8) { | ||||||
|  |                 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4); | ||||||
|  |                 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4); | ||||||
|  |             } else { | ||||||
|  |                 mvuv = b->mv[0][1]; | ||||||
|  |             } | ||||||
|  |             mc_chroma_dir(s, mc[bwl][b->filter][1], | ||||||
|  |                           s->dst[1], s->dst[2], ls_uv, | ||||||
|  |                           ref2->data[1], ref2->linesize[1], | ||||||
|  |                           ref2->data[2], ref2->linesize[2], tref2, | ||||||
|  |                           row << 2, col << 2, &mvuv, bw, bh, w2, h2, 1); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -1,5 +1,8 @@ | |||||||
| /* | /* | ||||||
|  * Copyright (C) 2008 Michael Niedermayer |  * VP9 compatible video decoder | ||||||
|  |  * | ||||||
|  |  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> | ||||||
|  |  * Copyright (C) 2013 Clément Bœsch <u pkh me> | ||||||
|  * |  * | ||||||
|  * This file is part of FFmpeg. |  * This file is part of FFmpeg. | ||||||
|  * |  * | ||||||
|   | |||||||
| @@ -1707,8 +1707,9 @@ copy_avg_fn(4) | |||||||
| #undef fpel_fn | #undef fpel_fn | ||||||
| #undef copy_avg_fn | #undef copy_avg_fn | ||||||
|  |  | ||||||
| static const int8_t vp9_subpel_filters[3][15][8] = { | static const int16_t vp9_subpel_filters[3][16][8] = { | ||||||
|     [FILTER_8TAP_REGULAR] = { |     [FILTER_8TAP_REGULAR] = { | ||||||
|  |         {  0,  0,   0, 128,   0,   0,  0,  0 }, | ||||||
|         {  0,  1,  -5, 126,   8,  -3,  1,  0 }, |         {  0,  1,  -5, 126,   8,  -3,  1,  0 }, | ||||||
|         { -1,  3, -10, 122,  18,  -6,  2,  0 }, |         { -1,  3, -10, 122,  18,  -6,  2,  0 }, | ||||||
|         { -1,  4, -13, 118,  27,  -9,  3, -1 }, |         { -1,  4, -13, 118,  27,  -9,  3, -1 }, | ||||||
| @@ -1725,6 +1726,7 @@ static const int8_t vp9_subpel_filters[3][15][8] = { | |||||||
|         {  0,  2,  -6,  18, 122, -10,  3, -1 }, |         {  0,  2,  -6,  18, 122, -10,  3, -1 }, | ||||||
|         {  0,  1,  -3,   8, 126,  -5,  1,  0 }, |         {  0,  1,  -3,   8, 126,  -5,  1,  0 }, | ||||||
|     }, [FILTER_8TAP_SHARP] = { |     }, [FILTER_8TAP_SHARP] = { | ||||||
|  |         {  0,  0,   0, 128,   0,   0,  0,  0 }, | ||||||
|         { -1,  3,  -7, 127,   8,  -3,  1,  0 }, |         { -1,  3,  -7, 127,   8,  -3,  1,  0 }, | ||||||
|         { -2,  5, -13, 125,  17,  -6,  3, -1 }, |         { -2,  5, -13, 125,  17,  -6,  3, -1 }, | ||||||
|         { -3,  7, -17, 121,  27, -10,  5, -2 }, |         { -3,  7, -17, 121,  27, -10,  5, -2 }, | ||||||
| @@ -1741,6 +1743,7 @@ static const int8_t vp9_subpel_filters[3][15][8] = { | |||||||
|         { -1,  3,  -6,  17, 125, -13,  5, -2 }, |         { -1,  3,  -6,  17, 125, -13,  5, -2 }, | ||||||
|         {  0,  1,  -3,   8, 127,  -7,  3, -1 }, |         {  0,  1,  -3,   8, 127,  -7,  3, -1 }, | ||||||
|     }, [FILTER_8TAP_SMOOTH] = { |     }, [FILTER_8TAP_SMOOTH] = { | ||||||
|  |         {  0,  0,   0, 128,   0,   0,  0,  0 }, | ||||||
|         { -3, -1,  32,  64,  38,   1, -3,  0 }, |         { -3, -1,  32,  64,  38,   1, -3,  0 }, | ||||||
|         { -2, -2,  29,  63,  41,   2, -3,  0 }, |         { -2, -2,  29,  63,  41,   2, -3,  0 }, | ||||||
|         { -2, -2,  26,  63,  43,   4, -4,  0 }, |         { -2, -2,  26,  63,  43,   4, -4,  0 }, | ||||||
| @@ -1772,7 +1775,7 @@ static const int8_t vp9_subpel_filters[3][15][8] = { | |||||||
| static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride, | static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride, | ||||||
|                                           const uint8_t *src, ptrdiff_t src_stride, |                                           const uint8_t *src, ptrdiff_t src_stride, | ||||||
|                                           int w, int h, ptrdiff_t ds, |                                           int w, int h, ptrdiff_t ds, | ||||||
|                                           const int8_t *filter, int avg) |                                           const int16_t *filter, int avg) | ||||||
| { | { | ||||||
|     do { |     do { | ||||||
|         int x; |         int x; | ||||||
| @@ -1792,7 +1795,7 @@ static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride, | |||||||
| #define filter_8tap_1d_fn(opn, opa, dir, ds) \ | #define filter_8tap_1d_fn(opn, opa, dir, ds) \ | ||||||
| static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ | static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ | ||||||
|                                                 const uint8_t *src, ptrdiff_t src_stride, \ |                                                 const uint8_t *src, ptrdiff_t src_stride, \ | ||||||
|                                                 int w, int h, const int8_t *filter) \ |                                                 int w, int h, const int16_t *filter) \ | ||||||
| { \ | { \ | ||||||
|     do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \ |     do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \ | ||||||
| } | } | ||||||
| @@ -1806,8 +1809,8 @@ filter_8tap_1d_fn(avg, 1, h, 1) | |||||||
|  |  | ||||||
| static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride, | static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride, | ||||||
|                                           const uint8_t *src, ptrdiff_t src_stride, |                                           const uint8_t *src, ptrdiff_t src_stride, | ||||||
|                                           int w, int h, const int8_t *filterx, |                                           int w, int h, const int16_t *filterx, | ||||||
|                                           const int8_t *filtery, int avg) |                                           const int16_t *filtery, int avg) | ||||||
| { | { | ||||||
|     int tmp_h = h + 7; |     int tmp_h = h + 7; | ||||||
|     uint8_t tmp[64 * 71], *tmp_ptr = tmp; |     uint8_t tmp[64 * 71], *tmp_ptr = tmp; | ||||||
| @@ -1842,8 +1845,8 @@ static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride, | |||||||
| #define filter_8tap_2d_fn(opn, opa) \ | #define filter_8tap_2d_fn(opn, opa) \ | ||||||
| static av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ | static av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ | ||||||
|                                            const uint8_t *src, ptrdiff_t src_stride, \ |                                            const uint8_t *src, ptrdiff_t src_stride, \ | ||||||
|                                            int w, int h, const int8_t *filterx, \ |                                            int w, int h, const int16_t *filterx, \ | ||||||
|                                            const int8_t *filtery) \ |                                            const int16_t *filtery) \ | ||||||
| { \ | { \ | ||||||
|     do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \ |     do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \ | ||||||
| } | } | ||||||
| @@ -1853,15 +1856,13 @@ filter_8tap_2d_fn(avg, 1) | |||||||
|  |  | ||||||
| #undef filter_8tap_2d_fn | #undef filter_8tap_2d_fn | ||||||
|  |  | ||||||
| #undef FILTER_8TAP |  | ||||||
|  |  | ||||||
| #define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \ | #define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \ | ||||||
| static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ | static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ | ||||||
|                                               const uint8_t *src, ptrdiff_t src_stride, \ |                                               const uint8_t *src, ptrdiff_t src_stride, \ | ||||||
|                                               int h, int mx, int my) \ |                                               int h, int mx, int my) \ | ||||||
| { \ | { \ | ||||||
|     avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \ |     avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \ | ||||||
|                             vp9_subpel_filters[type_idx][dir_m - 1]); \ |                             vp9_subpel_filters[type_idx][dir_m]); \ | ||||||
| } | } | ||||||
|  |  | ||||||
| #define filter_fn_2d(sz, type, type_idx, avg) \ | #define filter_fn_2d(sz, type, type_idx, avg) \ | ||||||
| @@ -1870,8 +1871,8 @@ static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ | |||||||
|                                            int h, int mx, int my) \ |                                            int h, int mx, int my) \ | ||||||
| { \ | { \ | ||||||
|     avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \ |     avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \ | ||||||
|                        vp9_subpel_filters[type_idx][mx - 1], \ |                        vp9_subpel_filters[type_idx][mx], \ | ||||||
|                        vp9_subpel_filters[type_idx][my - 1]); \ |                        vp9_subpel_filters[type_idx][my]); \ | ||||||
| } | } | ||||||
|  |  | ||||||
| #define FILTER_BILIN(src, x, mxy, stride) \ | #define FILTER_BILIN(src, x, mxy, stride) \ | ||||||
| @@ -1957,8 +1958,6 @@ bilin_2d_fn(avg, 1) | |||||||
|  |  | ||||||
| #undef bilin_2d_fn | #undef bilin_2d_fn | ||||||
|  |  | ||||||
| #undef FILTER_BILIN |  | ||||||
|  |  | ||||||
| #define bilinf_fn_1d(sz, dir, dir_m, avg) \ | #define bilinf_fn_1d(sz, dir, dir_m, avg) \ | ||||||
| static void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ | static void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ | ||||||
|                                       const uint8_t *src, ptrdiff_t src_stride, \ |                                       const uint8_t *src, ptrdiff_t src_stride, \ | ||||||
| @@ -2053,12 +2052,190 @@ static av_cold void vp9dsp_mc_init(VP9DSPContext *dsp) | |||||||
| #undef init_subpel3 | #undef init_subpel3 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static av_always_inline void do_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, | ||||||
|  |                                               const uint8_t *src, ptrdiff_t src_stride, | ||||||
|  |                                               int w, int h, int mx, int my, | ||||||
|  |                                               int dx, int dy, int avg, | ||||||
|  |                                               const int16_t (*filters)[8]) | ||||||
|  | { | ||||||
|  |     int tmp_h = (((h - 1) * dy + my) >> 4) + 8; | ||||||
|  |     uint8_t tmp[64 * 135], *tmp_ptr = tmp; | ||||||
|  |  | ||||||
|  |     src -= src_stride * 3; | ||||||
|  |     do { | ||||||
|  |         int x; | ||||||
|  |         int imx = mx, ioff = 0; | ||||||
|  |  | ||||||
|  |         for (x = 0; x < w; x++) { | ||||||
|  |             tmp_ptr[x] = FILTER_8TAP(src, ioff, filters[imx], 1); | ||||||
|  |             imx += dx; | ||||||
|  |             ioff += imx >> 4; | ||||||
|  |             imx &= 0xf; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         tmp_ptr += 64; | ||||||
|  |         src += src_stride; | ||||||
|  |     } while (--tmp_h); | ||||||
|  |  | ||||||
|  |     tmp_ptr = tmp + 64 * 3; | ||||||
|  |     do { | ||||||
|  |         int x; | ||||||
|  |         const int16_t *filter = filters[my]; | ||||||
|  |  | ||||||
|  |         for (x = 0; x < w; x++) | ||||||
|  |             if (avg) { | ||||||
|  |                 dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filter, 64) + 1) >> 1; | ||||||
|  |             } else { | ||||||
|  |                 dst[x] = FILTER_8TAP(tmp_ptr, x, filter, 64); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |         my += dy; | ||||||
|  |         tmp_ptr += (my >> 4) * 64; | ||||||
|  |         my &= 0xf; | ||||||
|  |         dst += dst_stride; | ||||||
|  |     } while (--h); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #define scaled_filter_8tap_fn(opn, opa) \ | ||||||
|  | static av_noinline void opn##_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, \ | ||||||
|  |                                             const uint8_t *src, ptrdiff_t src_stride, \ | ||||||
|  |                                             int w, int h, int mx, int my, int dx, int dy, \ | ||||||
|  |                                             const int16_t (*filters)[8]) \ | ||||||
|  | { \ | ||||||
|  |     do_scaled_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | ||||||
|  |                      opa, filters); \ | ||||||
|  | } | ||||||
|  |  | ||||||
|  | scaled_filter_8tap_fn(put, 0) | ||||||
|  | scaled_filter_8tap_fn(avg, 1) | ||||||
|  |  | ||||||
|  | #undef scaled_filter_8tap_fn | ||||||
|  |  | ||||||
|  | #undef FILTER_8TAP | ||||||
|  |  | ||||||
|  | #define scaled_filter_fn(sz, type, type_idx, avg) \ | ||||||
|  | static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \ | ||||||
|  |                                            const uint8_t *src, ptrdiff_t src_stride, \ | ||||||
|  |                                            int h, int mx, int my, int dx, int dy) \ | ||||||
|  | { \ | ||||||
|  |     avg##_scaled_8tap_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy, \ | ||||||
|  |                         vp9_subpel_filters[type_idx]); \ | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static av_always_inline void do_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, | ||||||
|  |                                                const uint8_t *src, ptrdiff_t src_stride, | ||||||
|  |                                                int w, int h, int mx, int my, | ||||||
|  |                                                int dx, int dy, int avg) | ||||||
|  | { | ||||||
|  |     uint8_t tmp[64 * 129], *tmp_ptr = tmp; | ||||||
|  |     int tmp_h = (((h - 1) * dy + my) >> 4) + 2; | ||||||
|  |  | ||||||
|  |     do { | ||||||
|  |         int x; | ||||||
|  |         int imx = mx, ioff = 0; | ||||||
|  |  | ||||||
|  |         for (x = 0; x < w; x++) { | ||||||
|  |             tmp_ptr[x] = FILTER_BILIN(src, ioff, imx, 1); | ||||||
|  |             imx += dx; | ||||||
|  |             ioff += imx >> 4; | ||||||
|  |             imx &= 0xf; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         tmp_ptr += 64; | ||||||
|  |         src += src_stride; | ||||||
|  |     } while (--tmp_h); | ||||||
|  |  | ||||||
|  |     tmp_ptr = tmp; | ||||||
|  |     do { | ||||||
|  |         int x; | ||||||
|  |  | ||||||
|  |         for (x = 0; x < w; x++) | ||||||
|  |             if (avg) { | ||||||
|  |                 dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1; | ||||||
|  |             } else { | ||||||
|  |                 dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |         my += dy; | ||||||
|  |         tmp_ptr += (my >> 4) * 64; | ||||||
|  |         my &= 0xf; | ||||||
|  |         dst += dst_stride; | ||||||
|  |     } while (--h); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #define scaled_bilin_fn(opn, opa) \ | ||||||
|  | static av_noinline void opn##_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, \ | ||||||
|  |                                              const uint8_t *src, ptrdiff_t src_stride, \ | ||||||
|  |                                              int w, int h, int mx, int my, int dx, int dy) \ | ||||||
|  | { \ | ||||||
|  |     do_scaled_bilin_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, opa); \ | ||||||
|  | } | ||||||
|  |  | ||||||
|  | scaled_bilin_fn(put, 0) | ||||||
|  | scaled_bilin_fn(avg, 1) | ||||||
|  |  | ||||||
|  | #undef scaled_bilin_fn | ||||||
|  |  | ||||||
|  | #undef FILTER_BILIN | ||||||
|  |  | ||||||
|  | #define scaled_bilinf_fn(sz, avg) \ | ||||||
|  | static void avg##_scaled_bilin_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \ | ||||||
|  |                                         const uint8_t *src, ptrdiff_t src_stride, \ | ||||||
|  |                                         int h, int mx, int my, int dx, int dy) \ | ||||||
|  | { \ | ||||||
|  |     avg##_scaled_bilin_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy); \ | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #define scaled_filter_fns(sz, avg) \ | ||||||
|  | scaled_filter_fn(sz,        regular, FILTER_8TAP_REGULAR, avg) \ | ||||||
|  | scaled_filter_fn(sz,        smooth,  FILTER_8TAP_SMOOTH,  avg) \ | ||||||
|  | scaled_filter_fn(sz,        sharp,   FILTER_8TAP_SHARP,   avg) \ | ||||||
|  | scaled_bilinf_fn(sz,                                      avg) | ||||||
|  |  | ||||||
|  | #define scaled_filter_fn_set(avg) \ | ||||||
|  | scaled_filter_fns(64, avg) \ | ||||||
|  | scaled_filter_fns(32, avg) \ | ||||||
|  | scaled_filter_fns(16, avg) \ | ||||||
|  | scaled_filter_fns(8,  avg) \ | ||||||
|  | scaled_filter_fns(4,  avg) | ||||||
|  |  | ||||||
|  | scaled_filter_fn_set(put) | ||||||
|  | scaled_filter_fn_set(avg) | ||||||
|  |  | ||||||
|  | #undef scaled_filter_fns | ||||||
|  | #undef scaled_filter_fn_set | ||||||
|  | #undef scaled_filter_fn | ||||||
|  | #undef scaled_bilinf_fn | ||||||
|  |  | ||||||
|  | static av_cold void vp9dsp_scaled_mc_init(VP9DSPContext *dsp) | ||||||
|  | { | ||||||
|  | #define init_scaled(idx1, idx2, sz, type) \ | ||||||
|  |     dsp->smc[idx1][FILTER_8TAP_SMOOTH ][idx2] = type##_scaled_smooth_##sz##_c; \ | ||||||
|  |     dsp->smc[idx1][FILTER_8TAP_REGULAR][idx2] = type##_scaled_regular_##sz##_c; \ | ||||||
|  |     dsp->smc[idx1][FILTER_8TAP_SHARP  ][idx2] = type##_scaled_sharp_##sz##_c; \ | ||||||
|  |     dsp->smc[idx1][FILTER_BILINEAR    ][idx2] = type##_scaled_bilin_##sz##_c | ||||||
|  |  | ||||||
|  | #define init_scaled_put_avg(idx, sz) \ | ||||||
|  |     init_scaled(idx, 0, sz, put); \ | ||||||
|  |     init_scaled(idx, 1, sz, avg) | ||||||
|  |  | ||||||
|  |     init_scaled_put_avg(0, 64); | ||||||
|  |     init_scaled_put_avg(1, 32); | ||||||
|  |     init_scaled_put_avg(2, 16); | ||||||
|  |     init_scaled_put_avg(3,  8); | ||||||
|  |     init_scaled_put_avg(4,  4); | ||||||
|  |  | ||||||
|  | #undef init_scaled_put_avg | ||||||
|  | #undef init_scaled | ||||||
|  | } | ||||||
|  |  | ||||||
| av_cold void ff_vp9dsp_init(VP9DSPContext *dsp) | av_cold void ff_vp9dsp_init(VP9DSPContext *dsp) | ||||||
| { | { | ||||||
|     vp9dsp_intrapred_init(dsp); |     vp9dsp_intrapred_init(dsp); | ||||||
|     vp9dsp_itxfm_init(dsp); |     vp9dsp_itxfm_init(dsp); | ||||||
|     vp9dsp_loopfilter_init(dsp); |     vp9dsp_loopfilter_init(dsp); | ||||||
|     vp9dsp_mc_init(dsp); |     vp9dsp_mc_init(dsp); | ||||||
|  |     vp9dsp_scaled_mc_init(dsp); | ||||||
|  |  | ||||||
|     if (ARCH_X86) ff_vp9dsp_init_x86(dsp); |     if (ARCH_X86) ff_vp9dsp_init_x86(dsp); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -32,6 +32,9 @@ | |||||||
| typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, | typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, | ||||||
|                             const uint8_t *ref, ptrdiff_t ref_stride, |                             const uint8_t *ref, ptrdiff_t ref_stride, | ||||||
|                             int h, int mx, int my); |                             int h, int mx, int my); | ||||||
|  | typedef void (*vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, | ||||||
|  |                                    const uint8_t *ref, ptrdiff_t ref_stride, | ||||||
|  |                                    int h, int mx, int my, int dx, int dy); | ||||||
|  |  | ||||||
| typedef struct VP9DSPContext { | typedef struct VP9DSPContext { | ||||||
|     /* |     /* | ||||||
| @@ -109,6 +112,12 @@ typedef struct VP9DSPContext { | |||||||
|      * dst/stride are aligned by hsize |      * dst/stride are aligned by hsize | ||||||
|      */ |      */ | ||||||
|     vp9_mc_func mc[5][4][2][2][2]; |     vp9_mc_func mc[5][4][2][2][2]; | ||||||
|  |  | ||||||
|  |     /* | ||||||
|  |      * for scalable MC, first 3 dimensions identical to above, the other two | ||||||
|  |      * don't exist since it changes per stepsize. | ||||||
|  |      */ | ||||||
|  |     vp9_scaled_mc_func smc[5][4][2]; | ||||||
| } VP9DSPContext; | } VP9DSPContext; | ||||||
|  |  | ||||||
| void ff_vp9dsp_init(VP9DSPContext *dsp); | void ff_vp9dsp_init(VP9DSPContext *dsp); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user