diff --git a/configure b/configure index cfdf53d5d..d1062e0d4 100755 --- a/configure +++ b/configure @@ -41,7 +41,6 @@ Advanced options: ${toggle_vp10} VP10 codec support ${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders) ${toggle_postproc} postprocessing - ${toggle_vp9_postproc} vp9 specific postprocessing ${toggle_multithread} multithreaded encoding and decoding ${toggle_spatial_resampling} spatial sampling (scaling) support ${toggle_realtime_only} enable this option while building for real-time encoding @@ -283,7 +282,6 @@ CONFIG_LIST=" dc_recon runtime_cpu_detect postproc - vp9_postproc multithread internal_stats ${CODECS} @@ -346,7 +344,6 @@ CMDLINE_SELECT=" dequant_tokens dc_recon postproc - vp9_postproc multithread internal_stats ${CODECS} @@ -442,7 +439,7 @@ process_targets() { done enabled debug_libs && DIST_DIR="${DIST_DIR}-debug" enabled codec_srcs && DIST_DIR="${DIST_DIR}-src" - ! enabled postproc && ! enabled vp9_postproc && DIST_DIR="${DIST_DIR}-nopost" + ! enabled postproc && DIST_DIR="${DIST_DIR}-nopost" ! enabled multithread && DIST_DIR="${DIST_DIR}-nomt" ! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs" DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}" @@ -626,10 +623,6 @@ process_toolchain() { enable_feature dc_recon fi - if enabled internal_stats; then - enable_feature vp9_postproc - fi - # Enable the postbuild target if building for visual studio. case "$tgt_cc" in vs*) enable_feature msvs diff --git a/vp10/common/alloccommon.c b/vp10/common/alloccommon.c index 9ca86e5e5..5469a7aae 100644 --- a/vp10/common/alloccommon.c +++ b/vp10/common/alloccommon.c @@ -81,15 +81,6 @@ void vp10_free_ref_frame_buffers(BufferPool *pool) { } } -void vp10_free_postproc_buffers(VP10_COMMON *cm) { -#if CONFIG_VP9_POSTPROC - vpx_free_frame_buffer(&cm->post_proc_buffer); - vpx_free_frame_buffer(&cm->post_proc_buffer_int); -#else - (void)cm; -#endif -} - void vp10_free_context_buffers(VP10_COMMON *cm) { cm->free_mi(cm); free_seg_map(cm); diff --git a/vp10/common/mfqe.c b/vp10/common/mfqe.c deleted file mode 100644 index c715ef73e..000000000 --- a/vp10/common/mfqe.c +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "./vp10_rtcd.h" -#include "./vpx_dsp_rtcd.h" -#include "./vpx_scale_rtcd.h" - -#include "vp10/common/onyxc_int.h" -#include "vp10/common/postproc.h" - -// TODO(jackychen): Replace this function with SSE2 code. There is -// one SSE2 implementation in vp8, so will consider how to share it -// between vp8 and vp9. -static void filter_by_weight(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int block_size, int src_weight) { - const int dst_weight = (1 << MFQE_PRECISION) - src_weight; - const int rounding_bit = 1 << (MFQE_PRECISION - 1); - int r, c; - - for (r = 0; r < block_size; r++) { - for (c = 0; c < block_size; c++) { - dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit) - >> MFQE_PRECISION; - } - src += src_stride; - dst += dst_stride; - } -} - -void vp10_filter_by_weight8x8_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, int src_weight) { - filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight); -} - -void vp10_filter_by_weight16x16_c(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int src_weight) { - filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight); -} - -static void filter_by_weight32x32(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, int weight) { - vp10_filter_by_weight16x16(src, src_stride, dst, dst_stride, weight); - vp10_filter_by_weight16x16(src + 16, src_stride, dst + 16, dst_stride, - weight); - vp10_filter_by_weight16x16(src + src_stride * 16, src_stride, - dst + dst_stride * 16, dst_stride, weight); - vp10_filter_by_weight16x16(src + src_stride * 16 + 16, src_stride, - dst + dst_stride * 16 + 16, dst_stride, weight); -} - -static void filter_by_weight64x64(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, int weight) { - filter_by_weight32x32(src, src_stride, dst, dst_stride, weight); - filter_by_weight32x32(src + 32, src_stride, dst + 32, - dst_stride, weight); - filter_by_weight32x32(src + src_stride * 32, src_stride, - dst + dst_stride * 32, dst_stride, weight); - filter_by_weight32x32(src + src_stride * 32 + 32, src_stride, - dst + dst_stride * 32 + 32, dst_stride, weight); -} - -static void apply_ifactor(const uint8_t *y, int y_stride, uint8_t *yd, - int yd_stride, const uint8_t *u, const uint8_t *v, - int uv_stride, uint8_t *ud, uint8_t *vd, - int uvd_stride, BLOCK_SIZE block_size, - int weight) { - if (block_size == BLOCK_16X16) { - vp10_filter_by_weight16x16(y, y_stride, yd, yd_stride, weight); - vp10_filter_by_weight8x8(u, uv_stride, ud, uvd_stride, weight); - vp10_filter_by_weight8x8(v, uv_stride, vd, uvd_stride, weight); - } else if (block_size == BLOCK_32X32) { - filter_by_weight32x32(y, y_stride, yd, yd_stride, weight); - vp10_filter_by_weight16x16(u, uv_stride, ud, uvd_stride, weight); - vp10_filter_by_weight16x16(v, uv_stride, vd, uvd_stride, weight); - } else if (block_size == BLOCK_64X64) { - filter_by_weight64x64(y, y_stride, yd, yd_stride, weight); - filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight); - filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight); - } -} - -// TODO(jackychen): Determine whether replace it with assembly code. -static void copy_mem8x8(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride) { - int r; - for (r = 0; r < 8; r++) { - memcpy(dst, src, 8); - src += src_stride; - dst += dst_stride; - } -} - -static void copy_mem16x16(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride) { - int r; - for (r = 0; r < 16; r++) { - memcpy(dst, src, 16); - src += src_stride; - dst += dst_stride; - } -} - -static void copy_mem32x32(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride) { - copy_mem16x16(src, src_stride, dst, dst_stride); - copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride); - copy_mem16x16(src + src_stride * 16, src_stride, - dst + dst_stride * 16, dst_stride); - copy_mem16x16(src + src_stride * 16 + 16, src_stride, - dst + dst_stride * 16 + 16, dst_stride); -} - -void copy_mem64x64(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride) { - copy_mem32x32(src, src_stride, dst, dst_stride); - copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride); - copy_mem32x32(src + src_stride * 32, src_stride, - dst + src_stride * 32, dst_stride); - copy_mem32x32(src + src_stride * 32 + 32, src_stride, - dst + src_stride * 32 + 32, dst_stride); -} - -static void copy_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, - int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud, - uint8_t *vd, int yd_stride, int uvd_stride, - BLOCK_SIZE bs) { - if (bs == BLOCK_16X16) { - copy_mem16x16(y, y_stride, yd, yd_stride); - copy_mem8x8(u, uv_stride, ud, uvd_stride); - copy_mem8x8(v, uv_stride, vd, uvd_stride); - } else if (bs == BLOCK_32X32) { - copy_mem32x32(y, y_stride, yd, yd_stride); - copy_mem16x16(u, uv_stride, ud, uvd_stride); - copy_mem16x16(v, uv_stride, vd, uvd_stride); - } else { - copy_mem64x64(y, y_stride, yd, yd_stride); - copy_mem32x32(u, uv_stride, ud, uvd_stride); - copy_mem32x32(v, uv_stride, vd, uvd_stride); - } -} - -static void get_thr(BLOCK_SIZE bs, int qdiff, int *sad_thr, int *vdiff_thr) { - const int adj = qdiff >> MFQE_PRECISION; - if (bs == BLOCK_16X16) { - *sad_thr = 7 + adj; - } else if (bs == BLOCK_32X32) { - *sad_thr = 6 + adj; - } else { // BLOCK_64X64 - *sad_thr = 5 + adj; - } - *vdiff_thr = 125 + qdiff; -} - -static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u, - const uint8_t *v, int y_stride, int uv_stride, - uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride, - int uvd_stride, int qdiff) { - int sad, sad_thr, vdiff, vdiff_thr; - uint32_t sse; - - get_thr(bs, qdiff, &sad_thr, &vdiff_thr); - - if (bs == BLOCK_16X16) { - vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8; - sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8; - } else if (bs == BLOCK_32X32) { - vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10; - sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10; - } else /* if (bs == BLOCK_64X64) */ { - vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12; - sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12; - } - - // vdiff > sad * 3 means vdiff should not be too small, otherwise, - // it might be a lighting change in smooth area. When there is a - // lighting change in smooth area, it is dangerous to do MFQE. - if (sad > 1 && vdiff > sad * 3) { - const int weight = 1 << MFQE_PRECISION; - int ifactor = weight * sad * vdiff / (sad_thr * vdiff_thr); - // When ifactor equals weight, no MFQE is done. - if (ifactor > weight) { - ifactor = weight; - } - apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd, - uvd_stride, bs, ifactor); - } else { - // Copy the block from current frame (i.e., no mfqe is done). - copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, - yd_stride, uvd_stride, bs); - } -} - -static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) { - // Check the motion in current block(for inter frame), - // or check the motion in the correlated block in last frame (for keyframe). - const int mv_len_square = mi->mbmi.mv[0].as_mv.row * - mi->mbmi.mv[0].as_mv.row + - mi->mbmi.mv[0].as_mv.col * - mi->mbmi.mv[0].as_mv.col; - const int mv_threshold = 100; - return mi->mbmi.mode >= NEARESTMV && // Not an intra block - cur_bs >= BLOCK_16X16 && - mv_len_square <= mv_threshold; -} - -// Process each partiton in a super block, recursively. -static void mfqe_partition(VP10_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs, - const uint8_t *y, const uint8_t *u, - const uint8_t *v, int y_stride, int uv_stride, - uint8_t *yd, uint8_t *ud, uint8_t *vd, - int yd_stride, int uvd_stride) { - int mi_offset, y_offset, uv_offset; - const BLOCK_SIZE cur_bs = mi->mbmi.sb_type; - const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex; - const int bsl = b_width_log2_lookup[bs]; - PARTITION_TYPE partition = partition_lookup[bsl][cur_bs]; - const BLOCK_SIZE subsize = get_subsize(bs, partition); - - if (cur_bs < BLOCK_8X8) { - // If there are blocks smaller than 8x8, it must be on the boundary. - return; - } - // No MFQE on blocks smaller than 16x16 - if (bs == BLOCK_16X16) { - partition = PARTITION_NONE; - } - if (bs == BLOCK_64X64) { - mi_offset = 4; - y_offset = 32; - uv_offset = 16; - } else { - mi_offset = 2; - y_offset = 16; - uv_offset = 8; - } - switch (partition) { - BLOCK_SIZE mfqe_bs, bs_tmp; - case PARTITION_HORZ: - if (bs == BLOCK_64X64) { - mfqe_bs = BLOCK_64X32; - bs_tmp = BLOCK_32X32; - } else { - mfqe_bs = BLOCK_32X16; - bs_tmp = BLOCK_16X16; - } - if (mfqe_decision(mi, mfqe_bs)) { - // Do mfqe on the first square partition. - mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, - yd, ud, vd, yd_stride, uvd_stride, qdiff); - // Do mfqe on the second square partition. - mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, - y_stride, uv_stride, yd + y_offset, ud + uv_offset, - vd + uv_offset, yd_stride, uvd_stride, qdiff); - } - if (mfqe_decision(mi + mi_offset * cm->mi_stride, mfqe_bs)) { - // Do mfqe on the first square partition. - mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride, - v + uv_offset * uv_stride, y_stride, uv_stride, - yd + y_offset * yd_stride, ud + uv_offset * uvd_stride, - vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff); - // Do mfqe on the second square partition. - mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset, - u + uv_offset * uv_stride + uv_offset, - v + uv_offset * uv_stride + uv_offset, y_stride, - uv_stride, yd + y_offset * yd_stride + y_offset, - ud + uv_offset * uvd_stride + uv_offset, - vd + uv_offset * uvd_stride + uv_offset, - yd_stride, uvd_stride, qdiff); - } - break; - case PARTITION_VERT: - if (bs == BLOCK_64X64) { - mfqe_bs = BLOCK_32X64; - bs_tmp = BLOCK_32X32; - } else { - mfqe_bs = BLOCK_16X32; - bs_tmp = BLOCK_16X16; - } - if (mfqe_decision(mi, mfqe_bs)) { - // Do mfqe on the first square partition. - mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, - yd, ud, vd, yd_stride, uvd_stride, qdiff); - // Do mfqe on the second square partition. - mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride, - v + uv_offset * uv_stride, y_stride, uv_stride, - yd + y_offset * yd_stride, ud + uv_offset * uvd_stride, - vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff); - } - if (mfqe_decision(mi + mi_offset, mfqe_bs)) { - // Do mfqe on the first square partition. - mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, - y_stride, uv_stride, yd + y_offset, ud + uv_offset, - vd + uv_offset, yd_stride, uvd_stride, qdiff); - // Do mfqe on the second square partition. - mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset, - u + uv_offset * uv_stride + uv_offset, - v + uv_offset * uv_stride + uv_offset, y_stride, - uv_stride, yd + y_offset * yd_stride + y_offset, - ud + uv_offset * uvd_stride + uv_offset, - vd + uv_offset * uvd_stride + uv_offset, - yd_stride, uvd_stride, qdiff); - } - break; - case PARTITION_NONE: - if (mfqe_decision(mi, cur_bs)) { - // Do mfqe on this partition. - mfqe_block(cur_bs, y, u, v, y_stride, uv_stride, - yd, ud, vd, yd_stride, uvd_stride, qdiff); - } else { - // Copy the block from current frame(i.e., no mfqe is done). - copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, - yd_stride, uvd_stride, bs); - } - break; - case PARTITION_SPLIT: - // Recursion on four square partitions, e.g. if bs is 64X64, - // then look into four 32X32 blocks in it. - mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd, - yd_stride, uvd_stride); - mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset, - v + uv_offset, y_stride, uv_stride, yd + y_offset, - ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride); - mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize, - y + y_offset * y_stride, u + uv_offset * uv_stride, - v + uv_offset * uv_stride, y_stride, uv_stride, - yd + y_offset * yd_stride, ud + uv_offset * uvd_stride, - vd + uv_offset * uvd_stride, yd_stride, uvd_stride); - mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset, - subsize, y + y_offset * y_stride + y_offset, - u + uv_offset * uv_stride + uv_offset, - v + uv_offset * uv_stride + uv_offset, y_stride, - uv_stride, yd + y_offset * yd_stride + y_offset, - ud + uv_offset * uvd_stride + uv_offset, - vd + uv_offset * uvd_stride + uv_offset, - yd_stride, uvd_stride); - break; - default: - assert(0); - } -} - -void vp10_mfqe(VP10_COMMON *cm) { - int mi_row, mi_col; - // Current decoded frame. - const YV12_BUFFER_CONFIG *show = cm->frame_to_show; - // Last decoded frame and will store the MFQE result. - YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer; - // Loop through each super block. - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) { - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { - MODE_INFO *mi; - MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col); - // Motion Info in last frame. - MODE_INFO *mi_prev = cm->postproc_state.prev_mi + - (mi_row * cm->mi_stride + mi_col); - const uint32_t y_stride = show->y_stride; - const uint32_t uv_stride = show->uv_stride; - const uint32_t yd_stride = dest->y_stride; - const uint32_t uvd_stride = dest->uv_stride; - const uint32_t row_offset_y = mi_row << 3; - const uint32_t row_offset_uv = mi_row << 2; - const uint32_t col_offset_y = mi_col << 3; - const uint32_t col_offset_uv = mi_col << 2; - const uint8_t *y = show->y_buffer + row_offset_y * y_stride + - col_offset_y; - const uint8_t *u = show->u_buffer + row_offset_uv * uv_stride + - col_offset_uv; - const uint8_t *v = show->v_buffer + row_offset_uv * uv_stride + - col_offset_uv; - uint8_t *yd = dest->y_buffer + row_offset_y * yd_stride + col_offset_y; - uint8_t *ud = dest->u_buffer + row_offset_uv * uvd_stride + - col_offset_uv; - uint8_t *vd = dest->v_buffer + row_offset_uv * uvd_stride + - col_offset_uv; - if (frame_is_intra_only(cm)) { - mi = mi_prev; - } else { - mi = mi_local; - } - mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud, - vd, yd_stride, uvd_stride); - } - } -} diff --git a/vp10/common/mfqe.h b/vp10/common/mfqe.h deleted file mode 100644 index 7bedd119f..000000000 --- a/vp10/common/mfqe.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP10_COMMON_MFQE_H_ -#define VP10_COMMON_MFQE_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -// Multiframe Quality Enhancement. -// The aim for MFQE is to replace pixel blocks in the current frame with -// the correlated pixel blocks (with higher quality) in the last frame. -// The replacement can only be taken in stationary blocks by checking -// the motion of the blocks and other conditions such as the SAD of -// the current block and correlated block, the variance of the block -// difference, etc. -void vp10_mfqe(struct VP10Common *cm); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP10_COMMON_MFQE_H_ diff --git a/vp10/common/mips/msa/mfqe_msa.c b/vp10/common/mips/msa/mfqe_msa.c deleted file mode 100644 index 3a593a1a1..000000000 --- a/vp10/common/mips/msa/mfqe_msa.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vp10_rtcd.h" -#include "vp10/common/onyxc_int.h" -#include "vpx_dsp/mips/macros_msa.h" - -static void filter_by_weight8x8_msa(const uint8_t *src_ptr, int32_t src_stride, - uint8_t *dst_ptr, int32_t dst_stride, - int32_t src_weight) { - int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight; - int32_t row; - uint64_t src0_d, src1_d, dst0_d, dst1_d; - v16i8 src0 = { 0 }; - v16i8 src1 = { 0 }; - v16i8 dst0 = { 0 }; - v16i8 dst1 = { 0 }; - v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l; - - src_wt = __msa_fill_h(src_weight); - dst_wt = __msa_fill_h(dst_weight); - - for (row = 2; row--;) { - LD2(src_ptr, src_stride, src0_d, src1_d); - src_ptr += (2 * src_stride); - LD2(dst_ptr, dst_stride, dst0_d, dst1_d); - INSERT_D2_SB(src0_d, src1_d, src0); - INSERT_D2_SB(dst0_d, dst1_d, dst0); - - LD2(src_ptr, src_stride, src0_d, src1_d); - src_ptr += (2 * src_stride); - LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d); - INSERT_D2_SB(src0_d, src1_d, src1); - INSERT_D2_SB(dst0_d, dst1_d, dst1); - - UNPCK_UB_SH(src0, src_r, src_l); - UNPCK_UB_SH(dst0, dst_r, dst_l); - res_h_r = (src_r * src_wt); - res_h_r += (dst_r * dst_wt); - res_h_l = (src_l * src_wt); - res_h_l += (dst_l * dst_wt); - SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); - dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r); - ST8x2_UB(dst0, dst_ptr, dst_stride); - dst_ptr += (2 * dst_stride); - - UNPCK_UB_SH(src1, src_r, src_l); - UNPCK_UB_SH(dst1, dst_r, dst_l); - res_h_r = (src_r * src_wt); - res_h_r += (dst_r * dst_wt); - res_h_l = (src_l * src_wt); - res_h_l += (dst_l * dst_wt); - SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); - dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r); - ST8x2_UB(dst1, dst_ptr, dst_stride); - dst_ptr += (2 * dst_stride); - } -} - -static void filter_by_weight16x16_msa(const uint8_t *src_ptr, - int32_t src_stride, - uint8_t *dst_ptr, - int32_t dst_stride, - int32_t src_weight) { - int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight; - int32_t row; - v16i8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; - v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l; - - src_wt = __msa_fill_h(src_weight); - dst_wt = __msa_fill_h(dst_weight); - - for (row = 4; row--;) { - LD_SB4(src_ptr, src_stride, src0, src1, src2, src3); - src_ptr += (4 * src_stride); - LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3); - - UNPCK_UB_SH(src0, src_r, src_l); - UNPCK_UB_SH(dst0, dst_r, dst_l); - res_h_r = (src_r * src_wt); - res_h_r += (dst_r * dst_wt); - res_h_l = (src_l * src_wt); - res_h_l += (dst_l * dst_wt); - SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); - PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); - dst_ptr += dst_stride; - - UNPCK_UB_SH(src1, src_r, src_l); - UNPCK_UB_SH(dst1, dst_r, dst_l); - res_h_r = (src_r * src_wt); - res_h_r += (dst_r * dst_wt); - res_h_l = (src_l * src_wt); - res_h_l += (dst_l * dst_wt); - SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); - PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); - dst_ptr += dst_stride; - - UNPCK_UB_SH(src2, src_r, src_l); - UNPCK_UB_SH(dst2, dst_r, dst_l); - res_h_r = (src_r * src_wt); - res_h_r += (dst_r * dst_wt); - res_h_l = (src_l * src_wt); - res_h_l += (dst_l * dst_wt); - SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); - PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); - dst_ptr += dst_stride; - - UNPCK_UB_SH(src3, src_r, src_l); - UNPCK_UB_SH(dst3, dst_r, dst_l); - res_h_r = (src_r * src_wt); - res_h_r += (dst_r * dst_wt); - res_h_l = (src_l * src_wt); - res_h_l += (dst_l * dst_wt); - SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); - PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); - dst_ptr += dst_stride; - } -} - -void vp10_filter_by_weight8x8_msa(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int src_weight) { - filter_by_weight8x8_msa(src, src_stride, dst, dst_stride, src_weight); -} - -void vp10_filter_by_weight16x16_msa(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int src_weight) { - filter_by_weight16x16_msa(src, src_stride, dst, dst_stride, src_weight); -} diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h index 22e16977d..701fad1e6 100644 --- a/vp10/common/onyxc_int.h +++ b/vp10/common/onyxc_int.h @@ -24,10 +24,6 @@ #include "vp10/common/quant_common.h" #include "vp10/common/tile_common.h" -#if CONFIG_VP9_POSTPROC -#include "vp10/common/postproc.h" -#endif - #ifdef __cplusplus extern "C" { #endif @@ -167,11 +163,6 @@ typedef struct VP10Common { int new_fb_idx; -#if CONFIG_VP9_POSTPROC - YV12_BUFFER_CONFIG post_proc_buffer; - YV12_BUFFER_CONFIG post_proc_buffer_int; -#endif - FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/ FRAME_TYPE frame_type; @@ -275,10 +266,6 @@ typedef struct VP10Common { vpx_bit_depth_t bit_depth; vpx_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer -#if CONFIG_VP9_POSTPROC - struct postproc_state postproc_state; -#endif - int error_resilient_mode; int log2_tile_cols, log2_tile_rows; diff --git a/vp10/common/postproc.c b/vp10/common/postproc.c deleted file mode 100644 index 4a43672fb..000000000 --- a/vp10/common/postproc.c +++ /dev/null @@ -1,746 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#include - -#include "./vpx_config.h" -#include "./vpx_scale_rtcd.h" -#include "./vp10_rtcd.h" - -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_ports/mem.h" -#include "vpx_ports/system_state.h" -#include "vpx_scale/vpx_scale.h" -#include "vpx_scale/yv12config.h" - -#include "vp10/common/onyxc_int.h" -#include "vp10/common/postproc.h" -#include "vp10/common/textblit.h" - -#if CONFIG_VP9_POSTPROC -static const short kernel5[] = { - 1, 1, 4, 1, 1 -}; - -const short vp10_rv[] = { - 8, 5, 2, 2, 8, 12, 4, 9, 8, 3, - 0, 3, 9, 0, 0, 0, 8, 3, 14, 4, - 10, 1, 11, 14, 1, 14, 9, 6, 12, 11, - 8, 6, 10, 0, 0, 8, 9, 0, 3, 14, - 8, 11, 13, 4, 2, 9, 0, 3, 9, 6, - 1, 2, 3, 14, 13, 1, 8, 2, 9, 7, - 3, 3, 1, 13, 13, 6, 6, 5, 2, 7, - 11, 9, 11, 8, 7, 3, 2, 0, 13, 13, - 14, 4, 12, 5, 12, 10, 8, 10, 13, 10, - 4, 14, 4, 10, 0, 8, 11, 1, 13, 7, - 7, 14, 6, 14, 13, 2, 13, 5, 4, 4, - 0, 10, 0, 5, 13, 2, 12, 7, 11, 13, - 8, 0, 4, 10, 7, 2, 7, 2, 2, 5, - 3, 4, 7, 3, 3, 14, 14, 5, 9, 13, - 3, 14, 3, 6, 3, 0, 11, 8, 13, 1, - 13, 1, 12, 0, 10, 9, 7, 6, 2, 8, - 5, 2, 13, 7, 1, 13, 14, 7, 6, 7, - 9, 6, 10, 11, 7, 8, 7, 5, 14, 8, - 4, 4, 0, 8, 7, 10, 0, 8, 14, 11, - 3, 12, 5, 7, 14, 3, 14, 5, 2, 6, - 11, 12, 12, 8, 0, 11, 13, 1, 2, 0, - 5, 10, 14, 7, 8, 0, 4, 11, 0, 8, - 0, 3, 10, 5, 8, 0, 11, 6, 7, 8, - 10, 7, 13, 9, 2, 5, 1, 5, 10, 2, - 4, 3, 5, 6, 10, 8, 9, 4, 11, 14, - 0, 10, 0, 5, 13, 2, 12, 7, 11, 13, - 8, 0, 4, 10, 7, 2, 7, 2, 2, 5, - 3, 4, 7, 3, 3, 14, 14, 5, 9, 13, - 3, 14, 3, 6, 3, 0, 11, 8, 13, 1, - 13, 1, 12, 0, 10, 9, 7, 6, 2, 8, - 5, 2, 13, 7, 1, 13, 14, 7, 6, 7, - 9, 6, 10, 11, 7, 8, 7, 5, 14, 8, - 4, 4, 0, 8, 7, 10, 0, 8, 14, 11, - 3, 12, 5, 7, 14, 3, 14, 5, 2, 6, - 11, 12, 12, 8, 0, 11, 13, 1, 2, 0, - 5, 10, 14, 7, 8, 0, 4, 11, 0, 8, - 0, 3, 10, 5, 8, 0, 11, 6, 7, 8, - 10, 7, 13, 9, 2, 5, 1, 5, 10, 2, - 4, 3, 5, 6, 10, 8, 9, 4, 11, 14, - 3, 8, 3, 7, 8, 5, 11, 4, 12, 3, - 11, 9, 14, 8, 14, 13, 4, 3, 1, 2, - 14, 6, 5, 4, 4, 11, 4, 6, 2, 1, - 5, 8, 8, 12, 13, 5, 14, 10, 12, 13, - 0, 9, 5, 5, 11, 10, 13, 9, 10, 13, -}; - -static const uint8_t q_diff_thresh = 20; -static const uint8_t last_q_thresh = 170; - -void vp10_post_proc_down_and_across_c(const uint8_t *src_ptr, - uint8_t *dst_ptr, - int src_pixels_per_line, - int dst_pixels_per_line, - int rows, - int cols, - int flimit) { - uint8_t const *p_src; - uint8_t *p_dst; - int row, col, i, v, kernel; - int pitch = src_pixels_per_line; - uint8_t d[8]; - (void)dst_pixels_per_line; - - for (row = 0; row < rows; row++) { - /* post_proc_down for one row */ - p_src = src_ptr; - p_dst = dst_ptr; - - for (col = 0; col < cols; col++) { - kernel = 4; - v = p_src[col]; - - for (i = -2; i <= 2; i++) { - if (abs(v - p_src[col + i * pitch]) > flimit) - goto down_skip_convolve; - - kernel += kernel5[2 + i] * p_src[col + i * pitch]; - } - - v = (kernel >> 3); - down_skip_convolve: - p_dst[col] = v; - } - - /* now post_proc_across */ - p_src = dst_ptr; - p_dst = dst_ptr; - - for (i = 0; i < 8; i++) - d[i] = p_src[i]; - - for (col = 0; col < cols; col++) { - kernel = 4; - v = p_src[col]; - - d[col & 7] = v; - - for (i = -2; i <= 2; i++) { - if (abs(v - p_src[col + i]) > flimit) - goto across_skip_convolve; - - kernel += kernel5[2 + i] * p_src[col + i]; - } - - d[col & 7] = (kernel >> 3); - across_skip_convolve: - - if (col >= 2) - p_dst[col - 2] = d[(col - 2) & 7]; - } - - /* handle the last two pixels */ - p_dst[col - 2] = d[(col - 2) & 7]; - p_dst[col - 1] = d[(col - 1) & 7]; - - - /* next row */ - src_ptr += pitch; - dst_ptr += pitch; - } -} - -#if CONFIG_VPX_HIGHBITDEPTH -void vp10_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr, - uint16_t *dst_ptr, - int src_pixels_per_line, - int dst_pixels_per_line, - int rows, - int cols, - int flimit) { - uint16_t const *p_src; - uint16_t *p_dst; - int row, col, i, v, kernel; - int pitch = src_pixels_per_line; - uint16_t d[8]; - - for (row = 0; row < rows; row++) { - // post_proc_down for one row. - p_src = src_ptr; - p_dst = dst_ptr; - - for (col = 0; col < cols; col++) { - kernel = 4; - v = p_src[col]; - - for (i = -2; i <= 2; i++) { - if (abs(v - p_src[col + i * pitch]) > flimit) - goto down_skip_convolve; - - kernel += kernel5[2 + i] * p_src[col + i * pitch]; - } - - v = (kernel >> 3); - - down_skip_convolve: - p_dst[col] = v; - } - - /* now post_proc_across */ - p_src = dst_ptr; - p_dst = dst_ptr; - - for (i = 0; i < 8; i++) - d[i] = p_src[i]; - - for (col = 0; col < cols; col++) { - kernel = 4; - v = p_src[col]; - - d[col & 7] = v; - - for (i = -2; i <= 2; i++) { - if (abs(v - p_src[col + i]) > flimit) - goto across_skip_convolve; - - kernel += kernel5[2 + i] * p_src[col + i]; - } - - d[col & 7] = (kernel >> 3); - - across_skip_convolve: - if (col >= 2) - p_dst[col - 2] = d[(col - 2) & 7]; - } - - /* handle the last two pixels */ - p_dst[col - 2] = d[(col - 2) & 7]; - p_dst[col - 1] = d[(col - 1) & 7]; - - - /* next row */ - src_ptr += pitch; - dst_ptr += dst_pixels_per_line; - } -} -#endif // CONFIG_VPX_HIGHBITDEPTH - -static int q2mbl(int x) { - if (x < 20) x = 20; - - x = 50 + (x - 50) * 10 / 8; - return x * x / 3; -} - -void vp10_mbpost_proc_across_ip_c(uint8_t *src, int pitch, - int rows, int cols, int flimit) { - int r, c, i; - uint8_t *s = src; - uint8_t d[16]; - - for (r = 0; r < rows; r++) { - int sumsq = 0; - int sum = 0; - - for (i = -8; i <= 6; i++) { - sumsq += s[i] * s[i]; - sum += s[i]; - d[i + 8] = 0; - } - - for (c = 0; c < cols + 8; c++) { - int x = s[c + 7] - s[c - 8]; - int y = s[c + 7] + s[c - 8]; - - sum += x; - sumsq += x * y; - - d[c & 15] = s[c]; - - if (sumsq * 15 - sum * sum < flimit) { - d[c & 15] = (8 + sum + s[c]) >> 4; - } - - s[c - 8] = d[(c - 8) & 15]; - } - s += pitch; - } -} - -#if CONFIG_VPX_HIGHBITDEPTH -void vp10_highbd_mbpost_proc_across_ip_c(uint16_t *src, int pitch, - int rows, int cols, int flimit) { - int r, c, i; - - uint16_t *s = src; - uint16_t d[16]; - - - for (r = 0; r < rows; r++) { - int sumsq = 0; - int sum = 0; - - for (i = -8; i <= 6; i++) { - sumsq += s[i] * s[i]; - sum += s[i]; - d[i + 8] = 0; - } - - for (c = 0; c < cols + 8; c++) { - int x = s[c + 7] - s[c - 8]; - int y = s[c + 7] + s[c - 8]; - - sum += x; - sumsq += x * y; - - d[c & 15] = s[c]; - - if (sumsq * 15 - sum * sum < flimit) { - d[c & 15] = (8 + sum + s[c]) >> 4; - } - - s[c - 8] = d[(c - 8) & 15]; - } - - s += pitch; - } -} -#endif // CONFIG_VPX_HIGHBITDEPTH - -void vp10_mbpost_proc_down_c(uint8_t *dst, int pitch, - int rows, int cols, int flimit) { - int r, c, i; - const short *rv3 = &vp10_rv[63 & rand()]; // NOLINT - - for (c = 0; c < cols; c++) { - uint8_t *s = &dst[c]; - int sumsq = 0; - int sum = 0; - uint8_t d[16]; - const short *rv2 = rv3 + ((c * 17) & 127); - - for (i = -8; i <= 6; i++) { - sumsq += s[i * pitch] * s[i * pitch]; - sum += s[i * pitch]; - } - - for (r = 0; r < rows + 8; r++) { - sumsq += s[7 * pitch] * s[ 7 * pitch] - s[-8 * pitch] * s[-8 * pitch]; - sum += s[7 * pitch] - s[-8 * pitch]; - d[r & 15] = s[0]; - - if (sumsq * 15 - sum * sum < flimit) { - d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4; - } - - s[-8 * pitch] = d[(r - 8) & 15]; - s += pitch; - } - } -} - -#if CONFIG_VPX_HIGHBITDEPTH -void vp10_highbd_mbpost_proc_down_c(uint16_t *dst, int pitch, - int rows, int cols, int flimit) { - int r, c, i; - const int16_t *rv3 = &vp10_rv[63 & rand()]; // NOLINT - - for (c = 0; c < cols; c++) { - uint16_t *s = &dst[c]; - int sumsq = 0; - int sum = 0; - uint16_t d[16]; - const int16_t *rv2 = rv3 + ((c * 17) & 127); - - for (i = -8; i <= 6; i++) { - sumsq += s[i * pitch] * s[i * pitch]; - sum += s[i * pitch]; - } - - for (r = 0; r < rows + 8; r++) { - sumsq += s[7 * pitch] * s[ 7 * pitch] - s[-8 * pitch] * s[-8 * pitch]; - sum += s[7 * pitch] - s[-8 * pitch]; - d[r & 15] = s[0]; - - if (sumsq * 15 - sum * sum < flimit) { - d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4; - } - - s[-8 * pitch] = d[(r - 8) & 15]; - s += pitch; - } - } -} -#endif // CONFIG_VPX_HIGHBITDEPTH - -static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, - int q, - int low_var_thresh, - int flag) { - double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; - int ppl = (int)(level + .5); - (void) low_var_thresh; - (void) flag; - -#if CONFIG_VPX_HIGHBITDEPTH - if (source->flags & YV12_FLAG_HIGHBITDEPTH) { - vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->y_buffer), - CONVERT_TO_SHORTPTR(post->y_buffer), - source->y_stride, post->y_stride, - source->y_height, source->y_width, - ppl); - - vp10_highbd_mbpost_proc_across_ip(CONVERT_TO_SHORTPTR(post->y_buffer), - post->y_stride, post->y_height, - post->y_width, q2mbl(q)); - - vp10_highbd_mbpost_proc_down(CONVERT_TO_SHORTPTR(post->y_buffer), - post->y_stride, post->y_height, - post->y_width, q2mbl(q)); - - vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->u_buffer), - CONVERT_TO_SHORTPTR(post->u_buffer), - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, - ppl); - vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->v_buffer), - CONVERT_TO_SHORTPTR(post->v_buffer), - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, - ppl); - } else { - vp10_post_proc_down_and_across(source->y_buffer, post->y_buffer, - source->y_stride, post->y_stride, - source->y_height, source->y_width, ppl); - - vp10_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, - post->y_width, q2mbl(q)); - - vp10_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, - post->y_width, q2mbl(q)); - - vp10_post_proc_down_and_across(source->u_buffer, post->u_buffer, - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, ppl); - vp10_post_proc_down_and_across(source->v_buffer, post->v_buffer, - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, ppl); - } -#else - vp10_post_proc_down_and_across(source->y_buffer, post->y_buffer, - source->y_stride, post->y_stride, - source->y_height, source->y_width, ppl); - - vp10_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, - post->y_width, q2mbl(q)); - - vp10_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, - post->y_width, q2mbl(q)); - - vp10_post_proc_down_and_across(source->u_buffer, post->u_buffer, - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, ppl); - vp10_post_proc_down_and_across(source->v_buffer, post->v_buffer, - source->uv_stride, post->uv_stride, - source->uv_height, source->uv_width, ppl); -#endif // CONFIG_VPX_HIGHBITDEPTH -} - -void vp10_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, - int q) { - const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q - + 0.0065 + 0.5); - int i; - - const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer}; - const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride}; - const int src_widths[3] = {src->y_width, src->uv_width, src->uv_width}; - const int src_heights[3] = {src->y_height, src->uv_height, src->uv_height}; - - uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer}; - const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride}; - - for (i = 0; i < MAX_MB_PLANE; ++i) { -#if CONFIG_VPX_HIGHBITDEPTH - assert((src->flags & YV12_FLAG_HIGHBITDEPTH) == - (dst->flags & YV12_FLAG_HIGHBITDEPTH)); - if (src->flags & YV12_FLAG_HIGHBITDEPTH) { - vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(srcs[i]), - CONVERT_TO_SHORTPTR(dsts[i]), - src_strides[i], dst_strides[i], - src_heights[i], src_widths[i], ppl); - } else { - vp10_post_proc_down_and_across(srcs[i], dsts[i], - src_strides[i], dst_strides[i], - src_heights[i], src_widths[i], ppl); - } -#else - vp10_post_proc_down_and_across(srcs[i], dsts[i], - src_strides[i], dst_strides[i], - src_heights[i], src_widths[i], ppl); -#endif // CONFIG_VPX_HIGHBITDEPTH - } -} - -void vp10_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, - int q) { - const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q - + 0.0065 + 0.5); - int i; - - const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer}; - const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride}; - const int src_widths[3] = {src->y_width, src->uv_width, src->uv_width}; - const int src_heights[3] = {src->y_height, src->uv_height, src->uv_height}; - - uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer}; - const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride}; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - const int src_stride = src_strides[i]; - const int src_width = src_widths[i] - 4; - const int src_height = src_heights[i] - 4; - const int dst_stride = dst_strides[i]; - -#if CONFIG_VPX_HIGHBITDEPTH - assert((src->flags & YV12_FLAG_HIGHBITDEPTH) == - (dst->flags & YV12_FLAG_HIGHBITDEPTH)); - if (src->flags & YV12_FLAG_HIGHBITDEPTH) { - const uint16_t *const src_plane = CONVERT_TO_SHORTPTR( - srcs[i] + 2 * src_stride + 2); - uint16_t *const dst_plane = CONVERT_TO_SHORTPTR( - dsts[i] + 2 * dst_stride + 2); - vp10_highbd_post_proc_down_and_across(src_plane, dst_plane, src_stride, - dst_stride, src_height, src_width, - ppl); - } else { - const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2; - uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2; - - vp10_post_proc_down_and_across(src_plane, dst_plane, src_stride, - dst_stride, src_height, src_width, ppl); - } -#else - const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2; - uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2; - vp10_post_proc_down_and_across(src_plane, dst_plane, src_stride, dst_stride, - src_height, src_width, ppl); -#endif - } -} - -static double gaussian(double sigma, double mu, double x) { - return 1 / (sigma * sqrt(2.0 * 3.14159265)) * - (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma))); -} - -static void fillrd(struct postproc_state *state, int q, int a) { - char char_dist[300]; - - double sigma; - int ai = a, qi = q, i; - - vpx_clear_system_state(); - - sigma = ai + .5 + .6 * (63 - qi) / 63.0; - - /* set up a lookup table of 256 entries that matches - * a gaussian distribution with sigma determined by q. - */ - { - int next, j; - - next = 0; - - for (i = -32; i < 32; i++) { - int a_i = (int)(0.5 + 256 * gaussian(sigma, 0, i)); - - if (a_i) { - for (j = 0; j < a_i; j++) { - char_dist[next + j] = (char) i; - } - - next = next + j; - } - } - - for (; next < 256; next++) - char_dist[next] = 0; - } - - for (i = 0; i < 3072; i++) { - state->noise[i] = char_dist[rand() & 0xff]; // NOLINT - } - - for (i = 0; i < 16; i++) { - state->blackclamp[i] = -char_dist[0]; - state->whiteclamp[i] = -char_dist[0]; - state->bothclamp[i] = -2 * char_dist[0]; - } - - state->last_q = q; - state->last_noise = a; -} - -void vp10_plane_add_noise_c(uint8_t *start, char *noise, - char blackclamp[16], - char whiteclamp[16], - char bothclamp[16], - unsigned int width, unsigned int height, int pitch) { - unsigned int i, j; - - // TODO(jbb): why does simd code use both but c doesn't, normalize and - // fix.. - (void) bothclamp; - for (i = 0; i < height; i++) { - uint8_t *pos = start + i * pitch; - char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT - - for (j = 0; j < width; j++) { - if (pos[j] < blackclamp[0]) - pos[j] = blackclamp[0]; - - if (pos[j] > 255 + whiteclamp[0]) - pos[j] = 255 + whiteclamp[0]; - - pos[j] += ref[j]; - } - } -} - -static void swap_mi_and_prev_mi(VP10_COMMON *cm) { - // Current mip will be the prev_mip for the next frame. - MODE_INFO *temp = cm->postproc_state.prev_mip; - cm->postproc_state.prev_mip = cm->mip; - cm->mip = temp; - - // Update the upper left visible macroblock ptrs. - cm->mi = cm->mip + cm->mi_stride + 1; - cm->postproc_state.prev_mi = cm->postproc_state.prev_mip + cm->mi_stride + 1; -} - -int vp10_post_proc_frame(struct VP10Common *cm, - YV12_BUFFER_CONFIG *dest, vp10_ppflags_t *ppflags) { - const int q = VPXMIN(105, cm->lf.filter_level * 2); - const int flags = ppflags->post_proc_flag; - YV12_BUFFER_CONFIG *const ppbuf = &cm->post_proc_buffer; - struct postproc_state *const ppstate = &cm->postproc_state; - - if (!cm->frame_to_show) - return -1; - - if (!flags) { - *dest = *cm->frame_to_show; - return 0; - } - - vpx_clear_system_state(); - - // Alloc memory for prev_mip in the first frame. - if (cm->current_video_frame == 1) { - cm->postproc_state.last_base_qindex = cm->base_qindex; - cm->postproc_state.last_frame_valid = 1; - ppstate->prev_mip = vpx_calloc(cm->mi_alloc_size, sizeof(*cm->mip)); - if (!ppstate->prev_mip) { - return 1; - } - ppstate->prev_mi = ppstate->prev_mip + cm->mi_stride + 1; - memset(ppstate->prev_mip, 0, - cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); - } - - // Allocate post_proc_buffer_int if needed. - if ((flags & VP9D_MFQE) && !cm->post_proc_buffer_int.buffer_alloc) { - if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) { - const int width = ALIGN_POWER_OF_TWO(cm->width, 4); - const int height = ALIGN_POWER_OF_TWO(cm->height, 4); - - if (vpx_alloc_frame_buffer(&cm->post_proc_buffer_int, width, height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VPX_HIGHBITDEPTH - cm->use_highbitdepth, -#endif // CONFIG_VPX_HIGHBITDEPTH - VPX_ENC_BORDER_IN_PIXELS, - cm->byte_alignment) < 0) { - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate MFQE framebuffer"); - } - - // Ensure that postproc is set to all 0s so that post proc - // doesn't pull random data in from edge. - memset(cm->post_proc_buffer_int.buffer_alloc, 128, - cm->post_proc_buffer.frame_size); - } - } - - if (vpx_realloc_frame_buffer(&cm->post_proc_buffer, cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VPX_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VPX_DEC_BORDER_IN_PIXELS, cm->byte_alignment, - NULL, NULL, NULL) < 0) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate post-processing buffer"); - - if ((flags & VP9D_MFQE) && cm->current_video_frame >= 2 && - cm->postproc_state.last_frame_valid && cm->bit_depth == 8 && - cm->postproc_state.last_base_qindex <= last_q_thresh && - cm->base_qindex - cm->postproc_state.last_base_qindex >= q_diff_thresh) { - vp10_mfqe(cm); - // TODO(jackychen): Consider whether enable deblocking by default - // if mfqe is enabled. Need to take both the quality and the speed - // into consideration. - if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) { - vp8_yv12_copy_frame(ppbuf, &cm->post_proc_buffer_int); - } - if ((flags & VP9D_DEMACROBLOCK) && cm->post_proc_buffer_int.buffer_alloc) { - deblock_and_de_macro_block(&cm->post_proc_buffer_int, ppbuf, - q + (ppflags->deblocking_level - 5) * 10, - 1, 0); - } else if (flags & VP9D_DEBLOCK) { - vp10_deblock(&cm->post_proc_buffer_int, ppbuf, q); - } else { - vp8_yv12_copy_frame(&cm->post_proc_buffer_int, ppbuf); - } - } else if (flags & VP9D_DEMACROBLOCK) { - deblock_and_de_macro_block(cm->frame_to_show, ppbuf, - q + (ppflags->deblocking_level - 5) * 10, 1, 0); - } else if (flags & VP9D_DEBLOCK) { - vp10_deblock(cm->frame_to_show, ppbuf, q); - } else { - vp8_yv12_copy_frame(cm->frame_to_show, ppbuf); - } - - cm->postproc_state.last_base_qindex = cm->base_qindex; - cm->postproc_state.last_frame_valid = 1; - - if (flags & VP9D_ADDNOISE) { - const int noise_level = ppflags->noise_level; - if (ppstate->last_q != q || - ppstate->last_noise != noise_level) { - fillrd(ppstate, 63 - q, noise_level); - } - - vp10_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp, - ppstate->whiteclamp, ppstate->bothclamp, - ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride); - } - - *dest = *ppbuf; - - /* handle problem with extending borders */ - dest->y_width = cm->width; - dest->y_height = cm->height; - dest->uv_width = dest->y_width >> cm->subsampling_x; - dest->uv_height = dest->y_height >> cm->subsampling_y; - - swap_mi_and_prev_mi(cm); - return 0; -} -#endif // CONFIG_VP9_POSTPROC diff --git a/vp10/common/postproc.h b/vp10/common/postproc.h deleted file mode 100644 index e2ce0dcc8..000000000 --- a/vp10/common/postproc.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP10_COMMON_POSTPROC_H_ -#define VP10_COMMON_POSTPROC_H_ - -#include "vpx_ports/mem.h" -#include "vpx_scale/yv12config.h" -#include "vp10/common/blockd.h" -#include "vp10/common/mfqe.h" -#include "vp10/common/ppflags.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct postproc_state { - int last_q; - int last_noise; - char noise[3072]; - int last_base_qindex; - int last_frame_valid; - MODE_INFO *prev_mip; - MODE_INFO *prev_mi; - DECLARE_ALIGNED(16, char, blackclamp[16]); - DECLARE_ALIGNED(16, char, whiteclamp[16]); - DECLARE_ALIGNED(16, char, bothclamp[16]); -}; - -struct VP10Common; - -#define MFQE_PRECISION 4 - -int vp10_post_proc_frame(struct VP10Common *cm, - YV12_BUFFER_CONFIG *dest, vp10_ppflags_t *flags); - -void vp10_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q); - -void vp10_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP10_COMMON_POSTPROC_H_ diff --git a/vp10/common/ppflags.h b/vp10/common/ppflags.h deleted file mode 100644 index 8592fe906..000000000 --- a/vp10/common/ppflags.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP10_COMMON_PPFLAGS_H_ -#define VP10_COMMON_PPFLAGS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - VP9D_NOFILTERING = 0, - VP9D_DEBLOCK = 1 << 0, - VP9D_DEMACROBLOCK = 1 << 1, - VP9D_ADDNOISE = 1 << 2, - VP9D_DEBUG_TXT_FRAME_INFO = 1 << 3, - VP9D_DEBUG_TXT_MBLK_MODES = 1 << 4, - VP9D_DEBUG_TXT_DC_DIFF = 1 << 5, - VP9D_DEBUG_TXT_RATE_INFO = 1 << 6, - VP9D_DEBUG_DRAW_MV = 1 << 7, - VP9D_DEBUG_CLR_BLK_MODES = 1 << 8, - VP9D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9, - VP9D_MFQE = 1 << 10 -}; - -typedef struct { - int post_proc_flag; - int deblocking_level; - int noise_level; -} vp10_ppflags_t; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP10_COMMON_PPFLAGS_H_ diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl index bfb380d85..2551c7a7c 100644 --- a/vp10/common/vp10_rtcd_defs.pl +++ b/vp10/common/vp10_rtcd_defs.pl @@ -54,33 +54,6 @@ if ($opts{arch} eq "x86_64") { $avx2_x86_64 = 'avx2'; } -# -# post proc -# -if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") { -add_proto qw/void vp10_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit"; -specialize qw/vp10_mbpost_proc_down sse2/; -$vp10_mbpost_proc_down_sse2=vp10_mbpost_proc_down_xmm; - -add_proto qw/void vp10_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit"; -specialize qw/vp10_mbpost_proc_across_ip sse2/; -$vp10_mbpost_proc_across_ip_sse2=vp10_mbpost_proc_across_ip_xmm; - -add_proto qw/void vp10_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit"; -specialize qw/vp10_post_proc_down_and_across sse2/; -$vp10_post_proc_down_and_across_sse2=vp10_post_proc_down_and_across_xmm; - -add_proto qw/void vp10_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"; -specialize qw/vp10_plane_add_noise sse2/; -$vp10_plane_add_noise_sse2=vp10_plane_add_noise_wmt; - -add_proto qw/void vp10_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight"; -specialize qw/vp10_filter_by_weight16x16 sse2 msa/; - -add_proto qw/void vp10_filter_by_weight8x8/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight"; -specialize qw/vp10_filter_by_weight8x8 sse2 msa/; -} - # # dct # diff --git a/vp10/common/x86/mfqe_sse2.asm b/vp10/common/x86/mfqe_sse2.asm deleted file mode 100644 index e714d06db..000000000 --- a/vp10/common/x86/mfqe_sse2.asm +++ /dev/null @@ -1,287 +0,0 @@ -; -; Copyright (c) 2015 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -; This file is a duplicate of mfqe_sse2.asm in VP8. -; TODO(jackychen): Find a way to fix the duplicate. -%include "vpx_ports/x86_abi_support.asm" - -;void vp10_filter_by_weight16x16_sse2 -;( -; unsigned char *src, -; int src_stride, -; unsigned char *dst, -; int dst_stride, -; int src_weight -;) -global sym(vp10_filter_by_weight16x16_sse2) PRIVATE -sym(vp10_filter_by_weight16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movd xmm0, arg(4) ; src_weight - pshuflw xmm0, xmm0, 0x0 ; replicate to all low words - punpcklqdq xmm0, xmm0 ; replicate to all hi words - - movdqa xmm1, [GLOBAL(tMFQE)] - psubw xmm1, xmm0 ; dst_weight - - mov rax, arg(0) ; src - mov rsi, arg(1) ; src_stride - mov rdx, arg(2) ; dst - mov rdi, arg(3) ; dst_stride - - mov rcx, 16 ; loop count - pxor xmm6, xmm6 - -.combine - movdqa xmm2, [rax] - movdqa xmm4, [rdx] - add rax, rsi - - ; src * src_weight - movdqa xmm3, xmm2 - punpcklbw xmm2, xmm6 - punpckhbw xmm3, xmm6 - pmullw xmm2, xmm0 - pmullw xmm3, xmm0 - - ; dst * dst_weight - movdqa xmm5, xmm4 - punpcklbw xmm4, xmm6 - punpckhbw xmm5, xmm6 - pmullw xmm4, xmm1 - pmullw xmm5, xmm1 - - ; sum, round and shift - paddw xmm2, xmm4 - paddw xmm3, xmm5 - paddw xmm2, [GLOBAL(tMFQE_round)] - paddw xmm3, [GLOBAL(tMFQE_round)] - psrlw xmm2, 4 - psrlw xmm3, 4 - - packuswb xmm2, xmm3 - movdqa [rdx], xmm2 - add rdx, rdi - - dec rcx - jnz .combine - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - - ret - -;void vp10_filter_by_weight8x8_sse2 -;( -; unsigned char *src, -; int src_stride, -; unsigned char *dst, -; int dst_stride, -; int src_weight -;) -global sym(vp10_filter_by_weight8x8_sse2) PRIVATE -sym(vp10_filter_by_weight8x8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movd xmm0, arg(4) ; src_weight - pshuflw xmm0, xmm0, 0x0 ; replicate to all low words - punpcklqdq xmm0, xmm0 ; replicate to all hi words - - movdqa xmm1, [GLOBAL(tMFQE)] - psubw xmm1, xmm0 ; dst_weight - - mov rax, arg(0) ; src - mov rsi, arg(1) ; src_stride - mov rdx, arg(2) ; dst - mov rdi, arg(3) ; dst_stride - - mov rcx, 8 ; loop count - pxor xmm4, xmm4 - -.combine - movq xmm2, [rax] - movq xmm3, [rdx] - add rax, rsi - - ; src * src_weight - punpcklbw xmm2, xmm4 - pmullw xmm2, xmm0 - - ; dst * dst_weight - punpcklbw xmm3, xmm4 - pmullw xmm3, xmm1 - - ; sum, round and shift - paddw xmm2, xmm3 - paddw xmm2, [GLOBAL(tMFQE_round)] - psrlw xmm2, 4 - - packuswb xmm2, xmm4 - movq [rdx], xmm2 - add rdx, rdi - - dec rcx - jnz .combine - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - - ret - -;void vp10_variance_and_sad_16x16_sse2 | arg -;( -; unsigned char *src1, 0 -; int stride1, 1 -; unsigned char *src2, 2 -; int stride2, 3 -; unsigned int *variance, 4 -; unsigned int *sad, 5 -;) -global sym(vp10_variance_and_sad_16x16_sse2) PRIVATE -sym(vp10_variance_and_sad_16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rax, arg(0) ; src1 - mov rcx, arg(1) ; stride1 - mov rdx, arg(2) ; src2 - mov rdi, arg(3) ; stride2 - - mov rsi, 16 ; block height - - ; Prep accumulator registers - pxor xmm3, xmm3 ; SAD - pxor xmm4, xmm4 ; sum of src2 - pxor xmm5, xmm5 ; sum of src2^2 - - ; Because we're working with the actual output frames - ; we can't depend on any kind of data alignment. -.accumulate - movdqa xmm0, [rax] ; src1 - movdqa xmm1, [rdx] ; src2 - add rax, rcx ; src1 + stride1 - add rdx, rdi ; src2 + stride2 - - ; SAD(src1, src2) - psadbw xmm0, xmm1 - paddusw xmm3, xmm0 - - ; SUM(src2) - pxor xmm2, xmm2 - psadbw xmm2, xmm1 ; sum src2 by misusing SAD against 0 - paddusw xmm4, xmm2 - - ; pmaddubsw would be ideal if it took two unsigned values. instead, - ; it expects a signed and an unsigned value. so instead we zero extend - ; and operate on words. - pxor xmm2, xmm2 - movdqa xmm0, xmm1 - punpcklbw xmm0, xmm2 - punpckhbw xmm1, xmm2 - pmaddwd xmm0, xmm0 - pmaddwd xmm1, xmm1 - paddd xmm5, xmm0 - paddd xmm5, xmm1 - - sub rsi, 1 - jnz .accumulate - - ; phaddd only operates on adjacent double words. - ; Finalize SAD and store - movdqa xmm0, xmm3 - psrldq xmm0, 8 - paddusw xmm0, xmm3 - paddd xmm0, [GLOBAL(t128)] - psrld xmm0, 8 - - mov rax, arg(5) - movd [rax], xmm0 - - ; Accumulate sum of src2 - movdqa xmm0, xmm4 - psrldq xmm0, 8 - paddusw xmm0, xmm4 - ; Square src2. Ignore high value - pmuludq xmm0, xmm0 - psrld xmm0, 8 - - ; phaddw could be used to sum adjacent values but we want - ; all the values summed. promote to doubles, accumulate, - ; shift and sum - pxor xmm2, xmm2 - movdqa xmm1, xmm5 - punpckldq xmm1, xmm2 - punpckhdq xmm5, xmm2 - paddd xmm1, xmm5 - movdqa xmm2, xmm1 - psrldq xmm1, 8 - paddd xmm1, xmm2 - - psubd xmm1, xmm0 - - ; (variance + 128) >> 8 - paddd xmm1, [GLOBAL(t128)] - psrld xmm1, 8 - mov rax, arg(4) - - movd [rax], xmm1 - - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -t128: -%ifndef __NASM_VER__ - ddq 128 -%elif CONFIG_BIG_ENDIAN - dq 0, 128 -%else - dq 128, 0 -%endif -align 16 -tMFQE: ; 1 << MFQE_PRECISION - times 8 dw 0x10 -align 16 -tMFQE_round: ; 1 << (MFQE_PRECISION - 1) - times 8 dw 0x08 diff --git a/vp10/common/x86/postproc_sse2.asm b/vp10/common/x86/postproc_sse2.asm deleted file mode 100644 index d5f8e927b..000000000 --- a/vp10/common/x86/postproc_sse2.asm +++ /dev/null @@ -1,694 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp10_post_proc_down_and_across_xmm -;( -; unsigned char *src_ptr, -; unsigned char *dst_ptr, -; int src_pixels_per_line, -; int dst_pixels_per_line, -; int rows, -; int cols, -; int flimit -;) -global sym(vp10_post_proc_down_and_across_xmm) PRIVATE -sym(vp10_post_proc_down_and_across_xmm): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - -%if ABI_IS_32BIT=1 && CONFIG_PIC=1 - ALIGN_STACK 16, rax - ; move the global rd onto the stack, since we don't have enough registers - ; to do PIC addressing - movdqa xmm0, [GLOBAL(rd42)] - sub rsp, 16 - movdqa [rsp], xmm0 -%define RD42 [rsp] -%else -%define RD42 [GLOBAL(rd42)] -%endif - - - movd xmm2, dword ptr arg(6) ;flimit - punpcklwd xmm2, xmm2 - punpckldq xmm2, xmm2 - punpcklqdq xmm2, xmm2 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(1) ;dst_ptr - - movsxd rcx, DWORD PTR arg(4) ;rows - movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch? - pxor xmm0, xmm0 ; mm0 = 00000000 - -.nextrow: - - xor rdx, rdx ; clear out rdx for use as loop counter -.nextcol: - movq xmm3, QWORD PTR [rsi] ; mm4 = r0 p0..p7 - punpcklbw xmm3, xmm0 ; mm3 = p0..p3 - movdqa xmm1, xmm3 ; mm1 = p0..p3 - psllw xmm3, 2 ; - - movq xmm5, QWORD PTR [rsi + rax] ; mm4 = r1 p0..p7 - punpcklbw xmm5, xmm0 ; mm5 = r1 p0..p3 - paddusw xmm3, xmm5 ; mm3 += mm6 - - ; thresholding - movdqa xmm7, xmm1 ; mm7 = r0 p0..p3 - psubusw xmm7, xmm5 ; mm7 = r0 p0..p3 - r1 p0..p3 - psubusw xmm5, xmm1 ; mm5 = r1 p0..p3 - r0 p0..p3 - paddusw xmm7, xmm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3) - pcmpgtw xmm7, xmm2 - - movq xmm5, QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7 - punpcklbw xmm5, xmm0 ; mm5 = r2 p0..p3 - paddusw xmm3, xmm5 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 - psubusw xmm6, xmm5 ; mm6 = r0 p0..p3 - r2 p0..p3 - psubusw xmm5, xmm1 ; mm5 = r2 p0..p3 - r2 p0..p3 - paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - - neg rax - movq xmm5, QWORD PTR [rsi+2*rax] ; mm4 = r-2 p0..p7 - punpcklbw xmm5, xmm0 ; mm5 = r-2 p0..p3 - paddusw xmm3, xmm5 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 - psubusw xmm6, xmm5 ; mm6 = p0..p3 - r-2 p0..p3 - psubusw xmm5, xmm1 ; mm5 = r-2 p0..p3 - p0..p3 - paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - movq xmm4, QWORD PTR [rsi+rax] ; mm4 = r-1 p0..p7 - punpcklbw xmm4, xmm0 ; mm4 = r-1 p0..p3 - paddusw xmm3, xmm4 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = r0 p0..p3 - psubusw xmm6, xmm4 ; mm6 = p0..p3 - r-2 p0..p3 - psubusw xmm4, xmm1 ; mm5 = r-1 p0..p3 - p0..p3 - paddusw xmm6, xmm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - - paddusw xmm3, RD42 ; mm3 += round value - psraw xmm3, 3 ; mm3 /= 8 - - pand xmm1, xmm7 ; mm1 select vals > thresh from source - pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result - paddusw xmm1, xmm7 ; combination - - packuswb xmm1, xmm0 ; pack to bytes - movq QWORD PTR [rdi], xmm1 ; - - neg rax ; pitch is positive - add rsi, 8 - add rdi, 8 - - add rdx, 8 - cmp edx, dword arg(5) ;cols - - jl .nextcol - - ; done with the all cols, start the across filtering in place - sub rsi, rdx - sub rdi, rdx - - xor rdx, rdx - movq mm0, QWORD PTR [rdi-8]; - -.acrossnextcol: - movq xmm7, QWORD PTR [rdi +rdx -2] - movd xmm4, DWORD PTR [rdi +rdx +6] - - pslldq xmm4, 8 - por xmm4, xmm7 - - movdqa xmm3, xmm4 - psrldq xmm3, 2 - punpcklbw xmm3, xmm0 ; mm3 = p0..p3 - movdqa xmm1, xmm3 ; mm1 = p0..p3 - psllw xmm3, 2 - - - movdqa xmm5, xmm4 - psrldq xmm5, 3 - punpcklbw xmm5, xmm0 ; mm5 = p1..p4 - paddusw xmm3, xmm5 ; mm3 += mm6 - - ; thresholding - movdqa xmm7, xmm1 ; mm7 = p0..p3 - psubusw xmm7, xmm5 ; mm7 = p0..p3 - p1..p4 - psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 - paddusw xmm7, xmm5 ; mm7 = abs(p0..p3 - p1..p4) - pcmpgtw xmm7, xmm2 - - movdqa xmm5, xmm4 - psrldq xmm5, 4 - punpcklbw xmm5, xmm0 ; mm5 = p2..p5 - paddusw xmm3, xmm5 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = p0..p3 - psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 - psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 - paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - - movdqa xmm5, xmm4 ; mm5 = p-2..p5 - punpcklbw xmm5, xmm0 ; mm5 = p-2..p1 - paddusw xmm3, xmm5 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = p0..p3 - psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4 - psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3 - paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - psrldq xmm4, 1 ; mm4 = p-1..p5 - punpcklbw xmm4, xmm0 ; mm4 = p-1..p2 - paddusw xmm3, xmm4 ; mm3 += mm5 - - ; thresholding - movdqa xmm6, xmm1 ; mm6 = p0..p3 - psubusw xmm6, xmm4 ; mm6 = p0..p3 - p1..p4 - psubusw xmm4, xmm1 ; mm5 = p1..p4 - p0..p3 - paddusw xmm6, xmm4 ; mm6 = abs(p0..p3 - p1..p4) - pcmpgtw xmm6, xmm2 - por xmm7, xmm6 ; accumulate thresholds - - paddusw xmm3, RD42 ; mm3 += round value - psraw xmm3, 3 ; mm3 /= 8 - - pand xmm1, xmm7 ; mm1 select vals > thresh from source - pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result - paddusw xmm1, xmm7 ; combination - - packuswb xmm1, xmm0 ; pack to bytes - movq QWORD PTR [rdi+rdx-8], mm0 ; store previous four bytes - movdq2q mm0, xmm1 - - add rdx, 8 - cmp edx, dword arg(5) ;cols - jl .acrossnextcol; - - ; last 8 pixels - movq QWORD PTR [rdi+rdx-8], mm0 - - ; done with this rwo - add rsi,rax ; next line - mov eax, dword arg(3) ;dst_pixels_per_line ; destination pitch? - add rdi,rax ; next destination - mov eax, dword arg(2) ;src_pixels_per_line ; destination pitch? - - dec rcx ; decrement count - jnz .nextrow ; next row - -%if ABI_IS_32BIT=1 && CONFIG_PIC=1 - add rsp,16 - pop rsp -%endif - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -%undef RD42 - - -;void vp10_mbpost_proc_down_xmm(unsigned char *dst, -; int pitch, int rows, int cols,int flimit) -extern sym(vp10_rv) -global sym(vp10_mbpost_proc_down_xmm) PRIVATE -sym(vp10_mbpost_proc_down_xmm): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 128+16 - - ; unsigned char d[16][8] at [rsp] - ; create flimit2 at [rsp+128] - mov eax, dword ptr arg(4) ;flimit - mov [rsp+128], eax - mov [rsp+128+4], eax - mov [rsp+128+8], eax - mov [rsp+128+12], eax -%define flimit4 [rsp+128] - -%if ABI_IS_32BIT=0 - lea r8, [GLOBAL(sym(vp10_rv))] -%endif - - ;rows +=8; - add dword arg(2), 8 - - ;for(c=0; ccommon; /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the @@ -413,13 +410,9 @@ int vp10_receive_compressed_data(VP10Decoder *pbi, return retcode; } -int vp10_get_raw_frame(VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd, - vp10_ppflags_t *flags) { +int vp10_get_raw_frame(VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd) { VP10_COMMON *const cm = &pbi->common; int ret = -1; -#if !CONFIG_VP9_POSTPROC - (void)*flags; -#endif if (pbi->ready_for_new_data == 1) return ret; @@ -432,17 +425,8 @@ int vp10_get_raw_frame(VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd, pbi->ready_for_new_data = 1; -#if CONFIG_VP9_POSTPROC - if (!cm->show_existing_frame) { - ret = vp10_post_proc_frame(cm, sd, flags); - } else { - *sd = *cm->frame_to_show; - ret = 0; - } -#else *sd = *cm->frame_to_show; ret = 0; -#endif /*!CONFIG_POSTPROC*/ vpx_clear_system_state(); return ret; } diff --git a/vp10/decoder/decoder.h b/vp10/decoder/decoder.h index 72a631020..52a56f044 100644 --- a/vp10/decoder/decoder.h +++ b/vp10/decoder/decoder.h @@ -20,7 +20,6 @@ #include "vp10/common/thread_common.h" #include "vp10/common/onyxc_int.h" -#include "vp10/common/ppflags.h" #include "vp10/decoder/dthread.h" #ifdef __cplusplus @@ -85,8 +84,7 @@ typedef struct VP10Decoder { int vp10_receive_compressed_data(struct VP10Decoder *pbi, size_t size, const uint8_t **dest); -int vp10_get_raw_frame(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd, - vp10_ppflags_t *flags); +int vp10_get_raw_frame(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd); vpx_codec_err_t vp10_copy_reference_dec(struct VP10Decoder *pbi, VP9_REFFRAME ref_frame_flag, diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index 9931a0e0b..2236f34f3 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -17,9 +17,6 @@ #include "vp10/common/alloccommon.h" #include "vp10/common/filter.h" #include "vp10/common/idct.h" -#if CONFIG_VP9_POSTPROC -#include "vp10/common/postproc.h" -#endif #include "vp10/common/reconinter.h" #include "vp10/common/reconintra.h" #include "vp10/common/tile_common.h" @@ -375,9 +372,6 @@ static void dealloc_compressor_data(VP10_COMP *cpi) { cpi->active_map.map = NULL; vp10_free_ref_frame_buffers(cm->buffer_pool); -#if CONFIG_VP9_POSTPROC - vp10_free_postproc_buffers(cm); -#endif vp10_free_context_buffers(cm); vpx_free_frame_buffer(&cpi->last_frame_uf); @@ -1969,9 +1963,6 @@ void vp10_remove_compressor(VP10_COMP *cpi) { vp10_remove_common(cm); vp10_free_ref_frame_buffers(cm->buffer_pool); -#if CONFIG_VP9_POSTPROC - vp10_free_postproc_buffers(cm); -#endif vpx_free(cpi); #if CONFIG_VP9_TEMPORAL_DENOISING @@ -2961,31 +2952,6 @@ static void set_size_dependent_vars(VP10_COMP *cpi, int *q, // lagged coding, and if the relevant speed feature flag is set. if (oxcf->pass == 2 && cpi->sf.static_segmentation) configure_static_seg_features(cpi); - -#if CONFIG_VP9_POSTPROC - if (oxcf->noise_sensitivity > 0) { - int l = 0; - switch (oxcf->noise_sensitivity) { - case 1: - l = 20; - break; - case 2: - l = 40; - break; - case 3: - l = 60; - break; - case 4: - case 5: - l = 100; - break; - case 6: - l = 150; - break; - } - vp10_denoise(cpi->Source, cpi->Source, l); - } -#endif // CONFIG_VP9_POSTPROC } static void init_motion_estimation(VP10_COMP *cpi) { @@ -4169,22 +4135,6 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags, { PSNR_STATS psnr2; double frame_ssim2 = 0, weight = 0; -#if CONFIG_VP9_POSTPROC - if (vpx_alloc_frame_buffer(&cm->post_proc_buffer, - recon->y_crop_width, recon->y_crop_height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VPX_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VPX_ENC_BORDER_IN_PIXELS, - cm->byte_alignment) < 0) { - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate post processing buffer"); - } - - vp10_deblock(cm->frame_to_show, &cm->post_proc_buffer, - cm->lf.filter_level * 10 / 6); -#endif vpx_clear_system_state(); #if CONFIG_VPX_HIGHBITDEPTH @@ -4315,20 +4265,13 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags, return 0; } -int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest, - vp10_ppflags_t *flags) { +int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest) { VP10_COMMON *cm = &cpi->common; -#if !CONFIG_VP9_POSTPROC - (void)flags; -#endif if (!cm->show_frame) { return -1; } else { int ret; -#if CONFIG_VP9_POSTPROC - ret = vp10_post_proc_frame(cm, dest, flags); -#else if (cm->frame_to_show) { *dest = *cm->frame_to_show; dest->y_width = cm->width; @@ -4339,7 +4282,6 @@ int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest, } else { ret = -1; } -#endif // !CONFIG_VP9_POSTPROC vpx_clear_system_state(); return ret; } diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index 9c074dda4..575449d91 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h @@ -17,7 +17,6 @@ #include "vpx/vp8cx.h" #include "vp10/common/alloccommon.h" -#include "vp10/common/ppflags.h" #include "vp10/common/entropymode.h" #include "vp10/common/thread_common.h" #include "vp10/common/onyxc_int.h" @@ -514,8 +513,7 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, int64_t *time_end, int flush); -int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest, - vp10_ppflags_t *flags); +int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest); int vp10_use_as_reference(VP10_COMP *cpi, int ref_frame_flags); diff --git a/vp10/encoder/temporal_filter.c b/vp10/encoder/temporal_filter.c index d9ccc9567..51c732452 100644 --- a/vp10/encoder/temporal_filter.c +++ b/vp10/encoder/temporal_filter.c @@ -313,7 +313,7 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, for (mb_row = 0; mb_row < mb_rows; mb_row++) { // Source frames are extended to 16 pixels. This is different than - // L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS) + // L/A/G reference frames that have a border of 32 (VPXENCBORDERINPIXELS) // A 6/8 tap filter is used for motion search. This requires 2 pixels // before and 3 pixels after. So the largest Y mv on a border would // then be 16 - VPX_INTERP_EXTEND. The UV blocks are half the size of the diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk index c8bcdf1be..874b99e03 100644 --- a/vp10/vp10_common.mk +++ b/vp10/vp10_common.mk @@ -10,7 +10,6 @@ VP10_COMMON_SRCS-yes += vp10_common.mk VP10_COMMON_SRCS-yes += vp10_iface_common.h -VP10_COMMON_SRCS-yes += common/ppflags.h VP10_COMMON_SRCS-yes += common/alloccommon.c VP10_COMMON_SRCS-yes += common/blockd.c VP10_COMMON_SRCS-yes += common/debugmodes.c @@ -64,15 +63,6 @@ VP10_COMMON_SRCS-yes += common/scan.h VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.h VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.c -VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h -VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.c -VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/mfqe.h -VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/mfqe.c -ifeq ($(CONFIG_VP9_POSTPROC),yes) -VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm -VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm -endif - ifneq ($(CONFIG_VPX_HIGHBITDEPTH),yes) VP10_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans4_dspr2.c VP10_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans8_dspr2.c @@ -84,10 +74,6 @@ VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct4x4_msa.c VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct8x8_msa.c VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct16x16_msa.c -ifeq ($(CONFIG_VP9_POSTPROC),yes) -VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/mfqe_msa.c -endif - VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_intrin_sse2.c VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm_sse2.c VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_dct32x32_impl_sse2.h diff --git a/vp10/vp10_cx_iface.c b/vp10/vp10_cx_iface.c index 9a393ae9c..dc5912a98 100644 --- a/vp10/vp10_cx_iface.c +++ b/vp10/vp10_cx_iface.c @@ -1121,34 +1121,16 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx, va_list args) { -#if CONFIG_VP9_POSTPROC - vp8_postproc_cfg_t *config = va_arg(args, vp8_postproc_cfg_t *); - if (config != NULL) { - ctx->preview_ppcfg = *config; - return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; - } -#else (void)ctx; (void)args; return VPX_CODEC_INCAPABLE; -#endif } static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) { YV12_BUFFER_CONFIG sd; - vp10_ppflags_t flags; - vp10_zero(flags); - if (ctx->preview_ppcfg.post_proc_flag) { - flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag; - flags.deblocking_level = ctx->preview_ppcfg.deblocking_level; - flags.noise_level = ctx->preview_ppcfg.noise_level; - } - - if (vp10_get_preview_raw_frame(ctx->cpi, &sd, &flags) == 0) { + if (vp10_get_preview_raw_frame(ctx->cpi, &sd) == 0) { yuvconfig2image(&ctx->preview_img, &sd, NULL); return &ctx->preview_img; } else { diff --git a/vp10/vp10_dx_iface.c b/vp10/vp10_dx_iface.c index 07d218965..c3b216ddc 100644 --- a/vp10/vp10_dx_iface.c +++ b/vp10/vp10_dx_iface.c @@ -29,7 +29,7 @@ #include "vp10/vp10_iface_common.h" -#define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) +#define VP9_CAP_POSTPROC 0 typedef vpx_codec_stream_info_t vp10_stream_info_t; @@ -119,9 +119,6 @@ static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { (FrameWorkerData *)worker->data1; vpx_get_worker_interface()->end(worker); vp10_remove_common(&frame_worker_data->pbi->common); -#if CONFIG_VP9_POSTPROC - vp10_free_postproc_buffers(&frame_worker_data->pbi->common); -#endif vp10_decoder_remove(frame_worker_data->pbi); vpx_free(frame_worker_data->scratch_buffer); #if CONFIG_MULTITHREAD @@ -313,15 +310,6 @@ static void set_default_ppflags(vp8_postproc_cfg_t *cfg) { cfg->noise_level = 0; } -static void set_ppflags(const vpx_codec_alg_priv_t *ctx, - vp10_ppflags_t *flags) { - flags->post_proc_flag = - ctx->postproc_cfg.post_proc_flag; - - flags->deblocking_level = ctx->postproc_cfg.deblocking_level; - flags->noise_level = ctx->postproc_cfg.noise_level; -} - static int frame_worker_hook(void *arg1, void *arg2) { FrameWorkerData *const frame_worker_data = (FrameWorkerData *)arg1; const uint8_t *data = frame_worker_data->data; @@ -554,7 +542,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) { YV12_BUFFER_CONFIG sd; - vp10_ppflags_t flags = {0, 0, 0}; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id]; FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; @@ -567,7 +554,7 @@ static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) { check_resync(ctx, frame_worker_data->pbi); - if (vp10_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { + if (vp10_get_raw_frame(frame_worker_data->pbi, &sd) == 0) { VP10_COMMON *const cm = &frame_worker_data->pbi->common; RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; ctx->frame_cache[ctx->frame_cache_write].fb_idx = cm->new_fb_idx; @@ -746,7 +733,6 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, if (*iter == NULL && ctx->frame_workers != NULL) { do { YV12_BUFFER_CONFIG sd; - vp10_ppflags_t flags = {0, 0, 0}; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id]; @@ -754,8 +740,6 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, (FrameWorkerData *)worker->data1; ctx->next_output_worker_id = (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; - if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) - set_ppflags(ctx, &flags); // Wait for the frame from worker thread. if (winterface->sync(worker)) { // Check if worker has received any frames. @@ -764,7 +748,7 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, frame_worker_data->received_frame = 0; check_resync(ctx, frame_worker_data->pbi); } - if (vp10_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { + if (vp10_get_raw_frame(frame_worker_data->pbi, &sd) == 0) { VP10_COMMON *const cm = &frame_worker_data->pbi->common; RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; release_last_output_frame(ctx); @@ -878,21 +862,9 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx, va_list args) { -#if CONFIG_VP9_POSTPROC - vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); - - if (data) { - ctx->postproc_cfg_set = 1; - ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data); - return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; - } -#else (void)ctx; (void)args; return VPX_CODEC_INCAPABLE; -#endif } static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx, diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk index 6939847ce..eb92a68af 100644 --- a/vp10/vp10cx.mk +++ b/vp10/vp10cx.mk @@ -77,10 +77,6 @@ VP10_CX_SRCS-yes += encoder/aq_complexity.c VP10_CX_SRCS-yes += encoder/aq_complexity.h VP10_CX_SRCS-yes += encoder/skin_detection.c VP10_CX_SRCS-yes += encoder/skin_detection.h -ifeq ($(CONFIG_VP9_POSTPROC),yes) -VP10_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.h -VP10_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.c -endif VP10_CX_SRCS-yes += encoder/temporal_filter.c VP10_CX_SRCS-yes += encoder/temporal_filter.h VP10_CX_SRCS-yes += encoder/mbgraph.c diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm index 708fa101c..197be7653 100644 --- a/vpx_ports/x86_abi_support.asm +++ b/vpx_ports/x86_abi_support.asm @@ -393,7 +393,7 @@ section .text ; On Android platforms use lrand48 when building postproc routines. Prior to L ; rand() was not available. -%if CONFIG_POSTPROC=1 || CONFIG_VP9_POSTPROC=1 +%if CONFIG_POSTPROC=1 %ifdef __ANDROID__ extern sym(lrand48) %define LIBVPX_RAND lrand48